1 Assembly process statistics

1.1 All contigs

# ---- Results ----

# ---- Quast descriptive stats ----

table1(~`# contigs (>= 0 bp)` + N50 + L50 + `Largest contig` + `Total length (>= 0 bp)`, data= quast_wgs)
Overall
(N=84)
# contigs (>= 0 bp)
Mean (SD) 120 (333)
Median [Min, Max] 45.0 [19.0, 2590]
N50
Mean (SD) 585000 (417000)
Median [Min, Max] 390000 [20900, 1640000]
L50
Mean (SD) 3.24 (4.36)
Median [Min, Max] 3.00 [1.00, 39.0]
Largest contig
Mean (SD) 883000 (367000)
Median [Min, Max] 836000 [77900, 1640000]
Total length (>= 0 bp)
Mean (SD) 2640000 (291000)
Median [Min, Max] 2660000 [1830000, 3470000]
table1(~`# contigs (>= 0 bp)` + N50 + L50 + `Largest contig` + `Total length (>= 0 bp)`, data= quast_wgs_filtered)
Overall
(N=84)
# contigs (>= 0 bp)
Mean (SD) 48.4 (137)
Median [Min, Max] 20.0 [7.00, 1100]
N50
Mean (SD) 585000 (417000)
Median [Min, Max] 390000 [20900, 1640000]
L50
Mean (SD) 3.24 (4.36)
Median [Min, Max] 3.00 [1.00, 39.0]
Largest contig
Mean (SD) 883000 (367000)
Median [Min, Max] 836000 [77900, 1640000]
Total length (>= 0 bp)
Mean (SD) 2610000 (276000)
Median [Min, Max] 2650000 [1830000, 3230000]

1.2 Contigs >500 bp (used for analysis)

# ---- Quast descriptive stats ----

table1(~`# contigs (>= 0 bp)` + N50 + L50 + `Largest contig` + `Total length (>= 0 bp)`, data= quast_wgs_filtered)
Overall
(N=84)
# contigs (>= 0 bp)
Mean (SD) 48.4 (137)
Median [Min, Max] 20.0 [7.00, 1100]
N50
Mean (SD) 585000 (417000)
Median [Min, Max] 390000 [20900, 1640000]
L50
Mean (SD) 3.24 (4.36)
Median [Min, Max] 3.00 [1.00, 39.0]
Largest contig
Mean (SD) 883000 (367000)
Median [Min, Max] 836000 [77900, 1640000]
Total length (>= 0 bp)
Mean (SD) 2610000 (276000)
Median [Min, Max] 2650000 [1830000, 3230000]

2 AMPs genes

# Presence of AMP related genes

# Prevalence AMPs

table1(~mic_sau + mic_sub+sau_inhibition_category+sub_inhibition_category+case_control+aip_1+aip_2+aip_3+aip_4+sactipeptides+subtilosin_a+putative_bacteriocin_193+putative_bacteriocin_194|wgs_species_3,data=subset(metadata_bagel,metadata_bagel$wgs_species_3!="SAU"))
SCH
(N=21)
SCON
(N=1)
SDEV
(N=2)
SEQ
(N=1)
SHAEM
(N=19)
SPXYL
(N=5)
SSC
(N=6)
SSUC
(N=12)
SXYL
(N=13)
Overall
(N=80)
mic_sau
Mean (SD) 3.32 (0.497) 4.59 (NA) 4.29 (0.297) NA (NA) 3.97 (0.648) 3.06 (0.522) 3.33 (0.560) 2.92 (0.692) 3.05 (0.394) 3.37 (0.675)
Median [Min, Max] 3.25 [2.59, 4.51] 4.59 [4.59, 4.59] 4.29 [4.08, 4.50] NA [NA, NA] 4.14 [2.16, 4.66] 3.01 [2.61, 3.93] 3.49 [2.60, 4.03] 3.03 [1.92, 4.11] 2.90 [2.71, 4.09] 3.24 [1.92, 4.66]
Missing 2 (9.5%) 0 (0%) 0 (0%) 1 (100%) 4 (21.1%) 0 (0%) 0 (0%) 1 (8.3%) 0 (0%) 8 (10.0%)
mic_sub
Mean (SD) 3.06 (0.604) NA (NA) 3.29 (0.297) NA (NA) 3.51 (0.775) 3.06 (0.522) 3.49 (0.480) 2.92 (0.931) 3.05 (0.698) 3.13 (0.687)
Median [Min, Max] 3.00 [2.51, 5.18] NA [NA, NA] 3.29 [3.08, 3.50] NA [NA, NA] 3.71 [2.16, 4.27] 3.01 [2.61, 3.93] 3.59 [2.60, 4.03] 2.58 [1.65, 4.14] 2.93 [1.71, 4.09] 3.10 [1.65, 5.18]
Missing 2 (9.5%) 1 (100%) 0 (0%) 1 (100%) 13 (68.4%) 0 (0%) 0 (0%) 1 (8.3%) 0 (0%) 18 (22.5%)
sau_inhibition_category
>70 1 (4.8%) 1 (100%) 0 (0%) 0 (0%) 2 (10.5%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 4 (5.0%)
10-20 2 (9.5%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (20.0%) 1 (16.7%) 0 (0%) 6 (46.2%) 10 (12.5%)
20-30 2 (9.5%) 0 (0%) 0 (0%) 0 (0%) 1 (5.3%) 2 (40.0%) 0 (0%) 3 (25.0%) 2 (15.4%) 10 (12.5%)
30-40 6 (28.6%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (16.7%) 0 (0%) 3 (23.1%) 10 (12.5%)
40-50 4 (19.0%) 0 (0%) 0 (0%) 0 (0%) 1 (5.3%) 0 (0%) 2 (33.3%) 2 (16.7%) 1 (7.7%) 10 (12.5%)
50-60 1 (4.8%) 0 (0%) 1 (50.0%) 0 (0%) 4 (21.1%) 1 (20.0%) 1 (16.7%) 0 (0%) 1 (7.7%) 9 (11.3%)
60-70 1 (4.8%) 0 (0%) 1 (50.0%) 0 (0%) 6 (31.6%) 0 (0%) 0 (0%) 1 (8.3%) 0 (0%) 9 (11.3%)
Top 10 2 (9.5%) 0 (0%) 0 (0%) 0 (0%) 1 (5.3%) 1 (20.0%) 1 (16.7%) 5 (41.7%) 0 (0%) 10 (12.5%)
Undetermined MIC 0 (0%) 0 (0%) 0 (0%) 1 (100%) 4 (21.1%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 5 (6.3%)
Missing 2 (9.5%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (8.3%) 0 (0%) 3 (3.8%)
sub_inhibition_category
>70 1 (4.8%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (1.3%)
10-20 5 (23.8%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 2 (40.0%) 1 (16.7%) 1 (8.3%) 1 (7.7%) 10 (12.5%)
20-30 3 (14.3%) 0 (0%) 1 (50.0%) 0 (0%) 0 (0%) 2 (40.0%) 0 (0%) 0 (0%) 4 (30.8%) 10 (12.5%)
30-40 5 (23.8%) 0 (0%) 0 (0%) 0 (0%) 1 (5.3%) 0 (0%) 0 (0%) 1 (8.3%) 2 (15.4%) 9 (11.3%)
40-50 3 (14.3%) 0 (0%) 1 (50.0%) 0 (0%) 1 (5.3%) 0 (0%) 3 (50.0%) 1 (8.3%) 0 (0%) 9 (11.3%)
Top 10 2 (9.5%) 0 (0%) 0 (0%) 0 (0%) 1 (5.3%) 0 (0%) 0 (0%) 5 (41.7%) 2 (15.4%) 10 (12.5%)
Undetermined MIC 0 (0%) 1 (100%) 0 (0%) 1 (100%) 13 (68.4%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 15 (18.8%)
50-60 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (5.3%) 1 (20.0%) 2 (33.3%) 1 (8.3%) 3 (23.1%) 8 (10.0%)
60-70 0 (0%) 0 (0%) 0 (0%) 0 (0%) 2 (10.5%) 0 (0%) 0 (0%) 2 (16.7%) 1 (7.7%) 5 (6.3%)
Missing 2 (9.5%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (8.3%) 0 (0%) 3 (3.8%)
case_control
Control 12 (57.1%) 0 (0%) 2 (100%) 1 (100%) 10 (52.6%) 2 (40.0%) 3 (50.0%) 6 (50.0%) 6 (46.2%) 42 (52.5%)
Case 9 (42.9%) 1 (100%) 0 (0%) 0 (0%) 9 (47.4%) 3 (60.0%) 3 (50.0%) 6 (50.0%) 7 (53.8%) 38 (47.5%)
aip_1
0 5 (23.8%) 1 (100%) 2 (100%) 1 (100%) 0 (0%) 5 (100%) 3 (50.0%) 12 (100%) 13 (100%) 42 (52.5%)
1 16 (76.2%) 0 (0%) 0 (0%) 0 (0%) 19 (100%) 0 (0%) 3 (50.0%) 0 (0%) 0 (0%) 38 (47.5%)
aip_2
0 16 (76.2%) 0 (0%) 2 (100%) 1 (100%) 19 (100%) 0 (0%) 6 (100%) 0 (0%) 6 (46.2%) 50 (62.5%)
1 5 (23.8%) 1 (100%) 0 (0%) 0 (0%) 0 (0%) 5 (100%) 0 (0%) 12 (100%) 7 (53.8%) 30 (37.5%)
aip_3
0 21 (100%) 1 (100%) 2 (100%) 1 (100%) 19 (100%) 5 (100%) 6 (100%) 12 (100%) 8 (61.5%) 75 (93.8%)
1 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 5 (38.5%) 5 (6.3%)
aip_4
0 21 (100%) 1 (100%) 0 (0%) 1 (100%) 19 (100%) 5 (100%) 6 (100%) 12 (100%) 12 (92.3%) 77 (96.3%)
1 0 (0%) 0 (0%) 2 (100%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (7.7%) 3 (3.8%)
sactipeptides
Mean (SD) 1.00 (0) 1.00 (NA) 1.00 (0) 1.00 (NA) 1.00 (0) 1.00 (0) 1.00 (0) 1.00 (0) 1.00 (0) 1.00 (0)
Median [Min, Max] 1.00 [1.00, 1.00] 1.00 [1.00, 1.00] 1.00 [1.00, 1.00] 1.00 [1.00, 1.00] 1.00 [1.00, 1.00] 1.00 [1.00, 1.00] 1.00 [1.00, 1.00] 1.00 [1.00, 1.00] 1.00 [1.00, 1.00] 1.00 [1.00, 1.00]
subtilosin_a
1 13 (61.9%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 13 (16.3%)
0 8 (38.1%) 1 (100%) 2 (100%) 1 (100%) 19 (100%) 5 (100%) 6 (100%) 12 (100%) 13 (100%) 67 (83.8%)
putative_bacteriocin_193
0 21 (100%) 1 (100%) 2 (100%) 1 (100%) 19 (100%) 5 (100%) 2 (33.3%) 12 (100%) 13 (100%) 76 (95.0%)
1 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 4 (66.7%) 0 (0%) 0 (0%) 4 (5.0%)
putative_bacteriocin_194
0 21 (100%) 1 (100%) 2 (100%) 0 (0%) 19 (100%) 5 (100%) 6 (100%) 12 (100%) 6 (46.2%) 72 (90.0%)
1 0 (0%) 0 (0%) 0 (0%) 1 (100%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 7 (53.8%) 8 (10.0%)
# AIP1

table(metadata_bagel$wgs_species_2,metadata_bagel$aip_1)
##                                       
##                                         0  1
##   Other                                 4  0
##   Staphylococcus chromogenes            5 16
##   Staphylococcus haemolyticus           0 19
##   Staphylococcus sciuri                 3  3
##   Staphylococcus succinus              12  0
##   Staphylococcus xylosus/pseudoxylosus 18  0
fisher.test(table(metadata_bagel$wgs_species_2,metadata_bagel$aip_1))
## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(metadata_bagel$wgs_species_2, metadata_bagel$aip_1)
## p-value = 1.063e-15
## alternative hypothesis: two.sided
# AIP2

table(metadata_bagel$wgs_species_2,metadata_bagel$aip_2)
##                                       
##                                         0  1
##   Other                                 3  1
##   Staphylococcus chromogenes           16  5
##   Staphylococcus haemolyticus          19  0
##   Staphylococcus sciuri                 6  0
##   Staphylococcus succinus               0 12
##   Staphylococcus xylosus/pseudoxylosus  6 12
fisher.test(table(metadata_bagel$wgs_species_2,metadata_bagel$aip_2))
## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(metadata_bagel$wgs_species_2, metadata_bagel$aip_2)
## p-value = 2.758e-10
## alternative hypothesis: two.sided
# AIP3

table(metadata_bagel$wgs_species_2,metadata_bagel$aip_3)
##                                       
##                                         0  1
##   Other                                 4  0
##   Staphylococcus chromogenes           21  0
##   Staphylococcus haemolyticus          19  0
##   Staphylococcus sciuri                 6  0
##   Staphylococcus succinus              12  0
##   Staphylococcus xylosus/pseudoxylosus 13  5
fisher.test(table(metadata_bagel$wgs_species_2,metadata_bagel$aip_3))
## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(metadata_bagel$wgs_species_2, metadata_bagel$aip_3)
## p-value = 0.007501
## alternative hypothesis: two.sided
# AIP4

table(metadata_bagel$wgs_species_2,metadata_bagel$aip_4)
##                                       
##                                         0  1
##   Other                                 2  2
##   Staphylococcus chromogenes           21  0
##   Staphylococcus haemolyticus          19  0
##   Staphylococcus sciuri                 6  0
##   Staphylococcus succinus              12  0
##   Staphylococcus xylosus/pseudoxylosus 17  1
fisher.test(table(metadata_bagel$wgs_species_2,metadata_bagel$aip_4))
## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(metadata_bagel$wgs_species_2, metadata_bagel$aip_4)
## p-value = 0.003651
## alternative hypothesis: two.sided
# Subtilosin A

table(metadata_bagel$wgs_species_2,metadata_bagel$subtilosin_a)
##                                       
##                                         1  0
##   Other                                 0  4
##   Staphylococcus chromogenes           13  8
##   Staphylococcus haemolyticus           0 19
##   Staphylococcus sciuri                 0  6
##   Staphylococcus succinus               0 12
##   Staphylococcus xylosus/pseudoxylosus  0 18
fisher.test(table(metadata_bagel$wgs_species_2,metadata_bagel$subtilosin_a))
## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(metadata_bagel$wgs_species_2, metadata_bagel$subtilosin_a)
## p-value = 4.126e-08
## alternative hypothesis: two.sided
# Putative bacteriocin 193

table(metadata_bagel$wgs_species_2,metadata_bagel$putative_bacteriocin_193)
##                                       
##                                         0  1
##   Other                                 4  0
##   Staphylococcus chromogenes           21  0
##   Staphylococcus haemolyticus          19  0
##   Staphylococcus sciuri                 2  4
##   Staphylococcus succinus              12  0
##   Staphylococcus xylosus/pseudoxylosus 18  0
fisher.test(table(metadata_bagel$wgs_species_2,metadata_bagel$putative_bacteriocin_193))
## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(metadata_bagel$wgs_species_2, metadata_bagel$putative_bacteriocin_193)
## p-value = 1.012e-05
## alternative hypothesis: two.sided
# Putative bacteriocin 194

table(metadata_bagel$wgs_species_2,metadata_bagel$putative_bacteriocin_194)
##                                       
##                                         0  1
##   Other                                 3  1
##   Staphylococcus chromogenes           21  0
##   Staphylococcus haemolyticus          19  0
##   Staphylococcus sciuri                 6  0
##   Staphylococcus succinus              12  0
##   Staphylococcus xylosus/pseudoxylosus 11  7
fisher.test(table(metadata_bagel$wgs_species_2,metadata_bagel$putative_bacteriocin_194))
## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(metadata_bagel$wgs_species_2, metadata_bagel$putative_bacteriocin_194)
## p-value = 0.0002158
## alternative hypothesis: two.sided

2.1 SCH

# --- Inhibitory activity SCH ----

# AIP1

table1(~aip_1|case_control,data=mb_chromogenes)
Control
(N=12)
Case
(N=9)
Overall
(N=21)
aip_1
0 1 (8.3%) 4 (44.4%) 5 (23.8%)
1 11 (91.7%) 5 (55.6%) 16 (76.2%)
model_1 <- glm(aip_1 ~ case_control,family=binomial(link="logit"),data=mb_chromogenes)
summary(model_1)
## 
## Call:
## glm(formula = aip_1 ~ case_control, family = binomial(link = "logit"), 
##     data = mb_chromogenes)
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)  
## (Intercept)         2.398      1.044   2.296   0.0217 *
## case_controlCase   -2.175      1.241  -1.752   0.0798 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 23.053  on 20  degrees of freedom
## Residual deviance: 19.249  on 19  degrees of freedom
## AIC: 23.249
## 
## Number of Fisher Scoring iterations: 5
exp(Confint(model_1))
##                    Estimate       2.5 %     97.5 %
## (Intercept)      11.0000000 2.140159987 201.083018
## case_controlCase  0.1136364 0.005045515   1.010237
effectsize::oddsratio_to_riskratio(model_1)
## Warning: 'p0' not provided.
##   RR is relative to the intercept (p0 = 0.92) - make sure your intercept
##   is meaningful.
## CIs are back-transformed from the logit scale.
## Parameter           | Risk Ratio |       95% CI
## -----------------------------------------------
## (Intercept)         |       0.92 |             
## case control [Case] |       0.61 | [0.06, 1.00]
## 
## Uncertainty intervals (profile-likelihood) and p-values (two-tailed)
##   computed using a Wald z-distribution approximation.
emmeans(model_1,~case_control,type="response")
##  case_control  prob     SE  df asymp.LCL asymp.UCL
##  Control      0.917 0.0798 Inf     0.587     0.988
##  Case         0.556 0.1656 Inf     0.251     0.823
## 
## Confidence level used: 0.95 
## Intervals are back-transformed from the logit scale
# AIP2

table1(~aip_2|case_control,data=mb_chromogenes)
Control
(N=12)
Case
(N=9)
Overall
(N=21)
aip_2
0 11 (91.7%) 5 (55.6%) 16 (76.2%)
1 1 (8.3%) 4 (44.4%) 5 (23.8%)
model_2 <- glm(aip_2 ~ case_control,family=binomial(link="logit"),data=mb_chromogenes)
summary(model_2)
## 
## Call:
## glm(formula = aip_2 ~ case_control, family = binomial(link = "logit"), 
##     data = mb_chromogenes)
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)  
## (Intercept)        -2.398      1.044  -2.296   0.0217 *
## case_controlCase    2.175      1.241   1.752   0.0798 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 23.053  on 20  degrees of freedom
## Residual deviance: 19.249  on 19  degrees of freedom
## AIC: 23.249
## 
## Number of Fisher Scoring iterations: 5
exp(Confint(model_2))
##                    Estimate      2.5 %      97.5 %
## (Intercept)      0.09090909 0.00497307   0.4672548
## case_controlCase 8.80000000 0.98986680 198.1958191
effectsize::oddsratio_to_riskratio(model_2)
## Warning: 'p0' not provided.
##   RR is relative to the intercept (p0 = 0.08) - make sure your intercept
##   is meaningful.
## CIs are back-transformed from the logit scale.
## Parameter           | Risk Ratio |        95% CI
## ------------------------------------------------
## (Intercept)         |       0.08 |              
## case control [Case] |       5.33 | [0.99, 11.37]
## 
## Uncertainty intervals (profile-likelihood) and p-values (two-tailed)
##   computed using a Wald z-distribution approximation.
emmeans(model_2,~case_control,type="response")
##  case_control   prob     SE  df asymp.LCL asymp.UCL
##  Control      0.0833 0.0798 Inf    0.0116     0.413
##  Case         0.4444 0.1656 Inf    0.1768     0.749
## 
## Confidence level used: 0.95 
## Intervals are back-transformed from the logit scale
# Subtilosin A

table1(~subtilosin_a|case_control,data=mb_chromogenes)
Control
(N=12)
Case
(N=9)
Overall
(N=21)
subtilosin_a
1 7 (58.3%) 6 (66.7%) 13 (61.9%)
0 5 (41.7%) 3 (33.3%) 8 (38.1%)
model_3 <- glm(subtilosin_a ~ case_control,family=binomial(link="logit"),data=mb_chromogenes)
summary(model_3)
## 
## Call:
## glm(formula = subtilosin_a ~ case_control, family = binomial(link = "logit"), 
##     data = mb_chromogenes)
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)
## (Intercept)       -0.3365     0.5855  -0.575    0.566
## case_controlCase  -0.3567     0.9181  -0.389    0.698
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 27.910  on 20  degrees of freedom
## Residual deviance: 27.758  on 19  degrees of freedom
## AIC: 31.758
## 
## Number of Fisher Scoring iterations: 4
exp(Confint(model_3))
##                   Estimate     2.5 %   97.5 %
## (Intercept)      0.7142857 0.2113989 2.237832
## case_controlCase 0.7000000 0.1053801 4.183982
effectsize::oddsratio_to_riskratio(model_3)
## Warning: 'p0' not provided.
##   RR is relative to the intercept (p0 = 0.42) - make sure your intercept
##   is meaningful.
## CIs are back-transformed from the logit scale.
## Parameter           | Risk Ratio |       95% CI
## -----------------------------------------------
## (Intercept)         |       0.42 |             
## case control [Case] |       0.80 | [0.17, 1.80]
## 
## Uncertainty intervals (profile-likelihood) and p-values (two-tailed)
##   computed using a Wald z-distribution approximation.
emmeans(model_3,~case_control,type="response")
##  case_control  prob    SE  df asymp.LCL asymp.UCL
##  Control      0.417 0.142 Inf     0.185     0.692
##  Case         0.333 0.157 Inf     0.111     0.667
## 
## Confidence level used: 0.95 
## Intervals are back-transformed from the logit scale
# Association between AMPs and MIC in SCH

# AIP1

model_1 <- lm(mic_sau ~ aip_1,data=mb_chromogenes)
summary(model_1)
## 
## Call:
## lm(formula = mic_sau ~ aip_1, data = mb_chromogenes)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.7647 -0.3097 -0.0300  0.2353  1.1553 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   3.1900     0.2533  12.594 4.79e-10 ***
## aip_11        0.1647     0.2851   0.578    0.571    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5066 on 17 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.01925,    Adjusted R-squared:  -0.03844 
## F-statistic: 0.3337 on 1 and 17 DF,  p-value: 0.5711
model_1 <- lm(mic_sub ~ aip_1,data=mb_chromogenes)
summary(model_1)
## 
## Call:
## lm(formula = mic_sub ~ aip_1, data = mb_chromogenes)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -0.578 -0.383 -0.088  0.206  2.092 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   2.9400     0.3093   9.507 3.23e-08 ***
## aip_11        0.1480     0.3481   0.425    0.676    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6185 on 17 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.01052,    Adjusted R-squared:  -0.04768 
## F-statistic: 0.1808 on 1 and 17 DF,  p-value: 0.676
# AIP2

model_2 <- lm(mic_sau ~ aip_2,data=mb_chromogenes)
summary(model_2)
## 
## Call:
## lm(formula = mic_sau ~ aip_2, data = mb_chromogenes)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.7647 -0.3097 -0.0300  0.2353  1.1553 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   3.3547     0.1308  25.647 4.97e-15 ***
## aip_21       -0.1647     0.2851  -0.578    0.571    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5066 on 17 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.01925,    Adjusted R-squared:  -0.03844 
## F-statistic: 0.3337 on 1 and 17 DF,  p-value: 0.5711
model_2 <- lm(mic_sub ~ aip_2,data=mb_chromogenes)
summary(model_2)
## 
## Call:
## lm(formula = mic_sub ~ aip_2, data = mb_chromogenes)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -0.578 -0.383 -0.088  0.206  2.092 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   3.0880     0.1597  19.336 5.19e-13 ***
## aip_21       -0.1480     0.3481  -0.425    0.676    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6185 on 17 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.01052,    Adjusted R-squared:  -0.04768 
## F-statistic: 0.1808 on 1 and 17 DF,  p-value: 0.676
# Subtilosin A

model_3 <- lm(mic_sau ~ subtilosin_a,data=mb_chromogenes)
summary(model_3)
## 
## Call:
## lm(formula = mic_sau ~ subtilosin_a, data = mb_chromogenes)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.84273 -0.33886  0.07727  0.20614  1.07727 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     3.4327     0.1484  23.137 2.73e-14 ***
## subtilosin_a0  -0.2677     0.2286  -1.171    0.258    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4921 on 17 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.07463,    Adjusted R-squared:  0.0202 
## F-statistic: 1.371 on 1 and 17 DF,  p-value: 0.2578
model_3 <- lm(mic_sub ~ subtilosin_a,data=mb_chromogenes)
summary(model_3)
## 
## Call:
## lm(formula = mic_sub ~ subtilosin_a, data = mb_chromogenes)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.55909 -0.41455 -0.06909  0.18545  2.11091 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    3.06909    0.18742  16.375 7.64e-12 ***
## subtilosin_a0 -0.02909    0.28884  -0.101    0.921    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6216 on 17 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.0005964,  Adjusted R-squared:  -0.05819 
## F-statistic: 0.01014 on 1 and 17 DF,  p-value: 0.921
# Clades within this group


# List of target isolates

target_isolates <- c("SCH1", "SCH3", "SCH5", "SCH8", "SCH15", "SCH16", "SCH17", "SCH18", "SCH20")

# Create a new column 'presence' based on target isolates
mb_chromogenes <- mb_chromogenes %>%
  mutate(clade = ifelse(wgs_isolate_id %in% target_isolates, "Present", "Absent"))


# List of target isolates
target_isolates_2 <- c("SCH2", "SCH11", "SCH21", "SCH9")

# Create a new column 'presence' based on target isolates
mb_chromogenes <- mb_chromogenes %>%
  mutate(clade_2 = ifelse(wgs_isolate_id %in% target_isolates_2, "Present", "Absent"))


table1(~mic_sau + mic_sub+sau_inhibition_category+sub_inhibition_category+subtilosin_a|clade,data=subset(mb_chromogenes,mb_chromogenes$wgs_species_3!="SAU"))
Absent
(N=12)
Present
(N=9)
Overall
(N=21)
mic_sau
Mean (SD) 3.30 (0.409) 3.35 (0.629) 3.32 (0.497)
Median [Min, Max] 3.25 [2.74, 4.18] 3.37 [2.59, 4.51] 3.25 [2.59, 4.51]
Missing 1 (8.3%) 1 (11.1%) 2 (9.5%)
mic_sub
Mean (SD) 3.21 (0.712) 2.85 (0.362) 3.06 (0.604)
Median [Min, Max] 3.16 [2.61, 5.18] 2.71 [2.51, 3.51] 3.00 [2.51, 5.18]
Missing 1 (8.3%) 1 (11.1%) 2 (9.5%)
sau_inhibition_category
10 to 20 2 (16.7%) 0 (0%) 2 (9.5%)
20 to 30 1 (8.3%) 1 (11.1%) 2 (9.5%)
30 to 40 5 (41.7%) 1 (11.1%) 6 (28.6%)
40 to 50 2 (16.7%) 2 (22.2%) 4 (19.0%)
60 to 70 1 (8.3%) 0 (0%) 1 (4.8%)
>70 0 (0%) 1 (11.1%) 1 (4.8%)
50 to 60 0 (0%) 1 (11.1%) 1 (4.8%)
Top 10 0 (0%) 2 (22.2%) 2 (9.5%)
Missing 1 (8.3%) 1 (11.1%) 2 (9.5%)
sub_inhibition_category
>70 1 (8.3%) 0 (0%) 1 (4.8%)
10 to 20 3 (25.0%) 2 (22.2%) 5 (23.8%)
20 to 30 1 (8.3%) 2 (22.2%) 3 (14.3%)
30 to 40 4 (33.3%) 1 (11.1%) 5 (23.8%)
40 to 50 2 (16.7%) 1 (11.1%) 3 (14.3%)
Top 10 0 (0%) 2 (22.2%) 2 (9.5%)
Missing 1 (8.3%) 1 (11.1%) 2 (9.5%)
subtilosin_a
1 4 (33.3%) 9 (100%) 13 (61.9%)
0 8 (66.7%) 0 (0%) 8 (38.1%)
# Isolates inside and outside clade

model <- lm(mic_sau~clade,data=mb_chromogenes)
summary(model)
## 
## Call:
## lm(formula = mic_sau ~ clade, data = mb_chromogenes)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.76125 -0.27926 -0.04727  0.26074  1.15875 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   3.29727    0.15400  21.411  9.8e-14 ***
## cladePresent  0.05398    0.23733   0.227    0.823    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5108 on 17 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.003034,   Adjusted R-squared:  -0.05561 
## F-statistic: 0.05173 on 1 and 17 DF,  p-value: 0.8228
Confint(model)
##                Estimate      2.5 %    97.5 %
## (Intercept)  3.29727273  2.9723606 3.6221848
## cladePresent 0.05397727 -0.4467459 0.5547004
emmeans(model,~clade)
##  clade   emmean    SE df lower.CL upper.CL
##  Absent    3.30 0.154 17     2.97     3.62
##  Present   3.35 0.181 17     2.97     3.73
## 
## Confidence level used: 0.95
model <- lm(mic_sub~clade,data=mb_chromogenes)
summary(model)
## 
## Call:
## lm(formula = mic_sub ~ clade, data = mb_chromogenes)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.5964 -0.3162 -0.1013  0.1112  1.9736 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    3.2064     0.1790  17.917 1.79e-12 ***
## cladePresent  -0.3551     0.2758  -1.288    0.215    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5935 on 17 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.08886,    Adjusted R-squared:  0.03527 
## F-statistic: 1.658 on 1 and 17 DF,  p-value: 0.2151
Confint(model)
##                Estimate      2.5 %    97.5 %
## (Intercept)   3.2063636  2.8288051 3.5839221
## cladePresent -0.3551136 -0.9369704 0.2267431
emmeans(model,~clade)
##  clade   emmean    SE df lower.CL upper.CL
##  Absent    3.21 0.179 17     2.83     3.58
##  Present   2.85 0.210 17     2.41     3.29
## 
## Confidence level used: 0.95
# AIP1

table1(~aip_1|clade,data=mb_chromogenes)
Absent
(N=12)
Present
(N=9)
Overall
(N=21)
aip_1
0 5 (41.7%) 0 (0%) 5 (23.8%)
1 7 (58.3%) 9 (100%) 16 (76.2%)
fisher.test(table(mb_chromogenes$aip_1,mb_chromogenes$clade))
## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(mb_chromogenes$aip_1, mb_chromogenes$clade)
## p-value = 0.04511
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  0.8206881       Inf
## sample estimates:
## odds ratio 
##        Inf
# AIP2

table1(~aip_2|clade,data=mb_chromogenes)
Absent
(N=12)
Present
(N=9)
Overall
(N=21)
aip_2
0 7 (58.3%) 9 (100%) 16 (76.2%)
1 5 (41.7%) 0 (0%) 5 (23.8%)
fisher.test(table(mb_chromogenes$aip_2,mb_chromogenes$clade))
## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(mb_chromogenes$aip_2, mb_chromogenes$clade)
## p-value = 0.04511
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  0.00000 1.21849
## sample estimates:
## odds ratio 
##          0
# Subtilosin A

table1(~subtilosin_a|clade,data=mb_chromogenes)
Absent
(N=12)
Present
(N=9)
Overall
(N=21)
subtilosin_a
1 4 (33.3%) 9 (100%) 13 (61.9%)
0 8 (66.7%) 0 (0%) 8 (38.1%)
fisher.test(table(mb_chromogenes$subtilosin_a,mb_chromogenes$clade))
## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(mb_chromogenes$subtilosin_a, mb_chromogenes$clade)
## p-value = 0.0046
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  0.0000000 0.4517167
## sample estimates:
## odds ratio 
##          0
# Isolates inside and outside clade 2

model <- lm(mic_sau~clade_2,data=mb_chromogenes)
summary(model)
## 
## Call:
## lm(formula = mic_sau ~ clade_2, data = mb_chromogenes)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.75812 -0.30312 -0.06812  0.24188  1.16188 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      3.3481     0.1267  26.420 3.03e-15 ***
## clade_2Present  -0.1781     0.3189  -0.559    0.584    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5069 on 17 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.01802,    Adjusted R-squared:  -0.03974 
## F-statistic: 0.3119 on 1 and 17 DF,  p-value: 0.5838
Confint(model)
##                 Estimate      2.5 %    97.5 %
## (Intercept)     3.348125  3.0807545 3.6154955
## clade_2Present -0.178125 -0.8509926 0.4947426
emmeans(model,~clade_2)
##  clade_2 emmean    SE df lower.CL upper.CL
##  Absent    3.35 0.127 17     3.08     3.62
##  Present   3.17 0.293 17     2.55     3.79
## 
## Confidence level used: 0.95
model <- lm(mic_sub~clade_2,data=mb_chromogenes)
summary(model)
## 
## Call:
## lm(formula = mic_sub ~ clade_2, data = mb_chromogenes)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.58812 -0.39312 -0.09667  0.16688  2.08188 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      3.0981     0.1534  20.197 2.55e-13 ***
## clade_2Present  -0.2615     0.3860  -0.677    0.507    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6136 on 17 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.02628,    Adjusted R-squared:  -0.031 
## F-statistic: 0.4587 on 1 and 17 DF,  p-value: 0.5073
Confint(model)
##                  Estimate     2.5 %    97.5 %
## (Intercept)     3.0981250  2.774496 3.4217537
## clade_2Present -0.2614583 -1.075906 0.5529893
emmeans(model,~clade_2)
##  clade_2 emmean    SE df lower.CL upper.CL
##  Absent    3.10 0.153 17     2.77     3.42
##  Present   2.84 0.354 17     2.09     3.58
## 
## Confidence level used: 0.95
# AIP1

table1(~aip_1|clade_2,data=mb_chromogenes)
Absent
(N=17)
Present
(N=4)
Overall
(N=21)
aip_1
0 1 (5.9%) 4 (100%) 5 (23.8%)
1 16 (94.1%) 0 (0%) 16 (76.2%)
fisher.test(table(mb_chromogenes$aip_1,mb_chromogenes$clade_2))
## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(mb_chromogenes$aip_1, mb_chromogenes$clade_2)
## p-value = 0.0008354
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  0.0000000 0.2697048
## sample estimates:
## odds ratio 
##          0
# AIP2

table1(~aip_2|clade_2,data=mb_chromogenes)
Absent
(N=17)
Present
(N=4)
Overall
(N=21)
aip_2
0 16 (94.1%) 0 (0%) 16 (76.2%)
1 1 (5.9%) 4 (100%) 5 (23.8%)
fisher.test(table(mb_chromogenes$aip_2,mb_chromogenes$clade_2))
## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(mb_chromogenes$aip_2, mb_chromogenes$clade_2)
## p-value = 0.0008354
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  3.707758      Inf
## sample estimates:
## odds ratio 
##        Inf
# Subtilosin A

table1(~subtilosin_a|clade_2,data=mb_chromogenes)
Absent
(N=17)
Present
(N=4)
Overall
(N=21)
subtilosin_a
1 10 (58.8%) 3 (75.0%) 13 (61.9%)
0 7 (41.2%) 1 (25.0%) 8 (38.1%)
fisher.test(table(mb_chromogenes$subtilosin_a,mb_chromogenes$clade_2))
## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(mb_chromogenes$subtilosin_a, mb_chromogenes$clade_2)
## p-value = 1
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  0.007958673 7.734743347
## sample estimates:
## odds ratio 
##  0.4921903

2.2 SHAEM

# --- AMPs in SHAEM----

# none of the AMPs were both present and absent in at least 2 isolates

# Inhibitory activity was absent in most isolates

2.3 SSC

# ---- AMPs in SSC ----

# AIP1

table1(~aip_1|case_control,data=mb_sciuri)
Control
(N=3)
Case
(N=3)
Overall
(N=6)
aip_1
0 1 (33.3%) 2 (66.7%) 3 (50.0%)
1 2 (66.7%) 1 (33.3%) 3 (50.0%)
model_1 <- glm(aip_1 ~ case_control,family=binomial(link="logit"),data=mb_sciuri)
summary(model_1)
## 
## Call:
## glm(formula = aip_1 ~ case_control, family = binomial(link = "logit"), 
##     data = mb_sciuri)
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)
## (Intercept)        0.6931     1.2247   0.566    0.571
## case_controlCase  -1.3863     1.7321  -0.800    0.423
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 8.3178  on 5  degrees of freedom
## Residual deviance: 7.6382  on 4  degrees of freedom
## AIC: 11.638
## 
## Number of Fisher Scoring iterations: 4
exp(Confint(model_1))
##                  Estimate       2.5 %    97.5 %
## (Intercept)          2.00 0.191564939 43.037669
## case_controlCase     0.25 0.004803826  6.562236
effectsize::oddsratio_to_riskratio(model_1)
## Warning: 'p0' not provided.
##   RR is relative to the intercept (p0 = 0.67) - make sure your intercept
##   is meaningful.
## CIs are back-transformed from the logit scale.
## Parameter           | Risk Ratio |       95% CI
## -----------------------------------------------
## (Intercept)         |       0.67 |             
## case control [Case] |       0.50 | [0.01, 1.39]
## 
## Uncertainty intervals (profile-likelihood) and p-values (two-tailed)
##   computed using a Wald z-distribution approximation.
emmeans(model_1,~case_control,type="response")
##  case_control  prob    SE  df asymp.LCL asymp.UCL
##  Control      0.667 0.272 Inf    0.1535     0.957
##  Case         0.333 0.272 Inf    0.0434     0.846
## 
## Confidence level used: 0.95 
## Intervals are back-transformed from the logit scale
# Putative bacteriocin 193

table1(~putative_bacteriocin_193|case_control,data=mb_sciuri)
Control
(N=3)
Case
(N=3)
Overall
(N=6)
putative_bacteriocin_193
0 1 (33.3%) 1 (33.3%) 2 (33.3%)
1 2 (66.7%) 2 (66.7%) 4 (66.7%)
model_2 <- glm(putative_bacteriocin_193 ~ case_control,family=binomial(link="logit"),data=mb_sciuri)
summary(model_2)
## 
## Call:
## glm(formula = putative_bacteriocin_193 ~ case_control, family = binomial(link = "logit"), 
##     data = mb_sciuri)
## 
## Coefficients:
##                   Estimate Std. Error z value Pr(>|z|)
## (Intercept)      6.931e-01  1.225e+00   0.566    0.571
## case_controlCase 2.459e-16  1.732e+00   0.000    1.000
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 7.6382  on 5  degrees of freedom
## Residual deviance: 7.6382  on 4  degrees of freedom
## AIC: 11.638
## 
## Number of Fisher Scoring iterations: 4
exp(Confint(model_2))
##                  Estimate      2.5 %   97.5 %
## (Intercept)             2 0.19156494 43.03767
## case_controlCase        1 0.02475536 40.39530
effectsize::oddsratio_to_riskratio(model_2)
## Warning: 'p0' not provided.
##   RR is relative to the intercept (p0 = 0.67) - make sure your intercept
##   is meaningful.
## CIs are back-transformed from the logit scale.
## Parameter           | Risk Ratio |       95% CI
## -----------------------------------------------
## (Intercept)         |       0.67 |             
## case control [Case] |       1.00 | [0.07, 1.48]
## 
## Uncertainty intervals (profile-likelihood) and p-values (two-tailed)
##   computed using a Wald z-distribution approximation.
emmeans(model_2,~case_control,type="response")
##  case_control  prob    SE  df asymp.LCL asymp.UCL
##  Control      0.667 0.272 Inf     0.154     0.957
##  Case         0.667 0.272 Inf     0.154     0.957
## 
## Confidence level used: 0.95 
## Intervals are back-transformed from the logit scale
# Association between AMPs and MIC in SSCI

# AIP1

model_1 <- lm(mic_sau ~ aip_1,data=mb_sciuri)
summary(model_1)
## 
## Call:
## lm(formula = mic_sau ~ aip_1, data = mb_sciuri)
## 
## Residuals:
##     1     2     3     4     5     6 
## -0.09  0.10  0.92 -0.41 -0.01 -0.51 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   3.5400     0.3282  10.785 0.000419 ***
## aip_11       -0.4300     0.4642  -0.926 0.406692    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5685 on 4 degrees of freedom
## Multiple R-squared:  0.1766, Adjusted R-squared:  -0.0292 
## F-statistic: 0.8581 on 1 and 4 DF,  p-value: 0.4067
model_1 <- lm(mic_sub ~ aip_1,data=mb_sciuri)
summary(model_1)
## 
## Call:
## lm(formula = mic_sub ~ aip_1, data = mb_sciuri)
## 
## Residuals:
##       1       2       3       4       5       6 
## -0.0900  0.1000  0.5867  0.2567 -0.0100 -0.8433 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.54000    0.30815  11.488 0.000328 ***
## aip_11      -0.09667    0.43579  -0.222 0.835318    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5337 on 4 degrees of freedom
## Multiple R-squared:  0.01215,    Adjusted R-squared:  -0.2348 
## F-statistic: 0.0492 on 1 and 4 DF,  p-value: 0.8353
# putative bacteriocin 193

model_2 <- lm(mic_sau ~ putative_bacteriocin_193,data=mb_sciuri)
summary(model_2)
## 
## Call:
## lm(formula = mic_sau ~ putative_bacteriocin_193, data = mb_sciuri)
## 
## Residuals:
##      1      2      3      4      5      6 
##  0.145  0.335  0.665 -0.665  0.225 -0.705 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)   
## (Intercept)                 3.3650     0.4423   7.607   0.0016 **
## putative_bacteriocin_1931  -0.0600     0.5418  -0.111   0.9171   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6256 on 4 degrees of freedom
## Multiple R-squared:  0.003057,   Adjusted R-squared:  -0.2462 
## F-statistic: 0.01227 on 1 and 4 DF,  p-value: 0.9171
model_2 <- lm(mic_sub ~ putative_bacteriocin_193,data=mb_sciuri)
summary(model_2)
## 
## Call:
## lm(formula = mic_sub ~ putative_bacteriocin_193, data = mb_sciuri)
## 
## Residuals:
##      1      2      3      4      5      6 
##  0.145  0.335  0.165 -0.165  0.225 -0.705 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 3.8650     0.3032  12.748 0.000218 ***
## putative_bacteriocin_1931  -0.5600     0.3713  -1.508 0.206010    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4288 on 4 degrees of freedom
## Multiple R-squared:  0.3625, Adjusted R-squared:  0.2031 
## F-statistic: 2.274 on 1 and 4 DF,  p-value: 0.206
# clades

# List of target isolates
target_isolates <- c("SSCI1", "SSCI2", "SSCI5", "SSCI6")


# Create a new column 'presence' based on target isolates
mb_sciuri <- mb_sciuri %>%
  mutate(clade = ifelse(wgs_isolate_id %in% target_isolates, "Present", "Absent"))


table1(~mic_sau + mic_sub+sau_inhibition_category+sub_inhibition_category+case_control+aip_1+aip_2+aip_3+aip_4+sactipeptides+subtilosin_a+putative_bacteriocin_193+putative_bacteriocin_194|clade,data=mb_sciuri)
Absent
(N=2)
Present
(N=4)
Overall
(N=6)
mic_sau
Mean (SD) 3.37 (0.940) 3.31 (0.476) 3.33 (0.560)
Median [Min, Max] 3.37 [2.70, 4.03] 3.49 [2.60, 3.64] 3.49 [2.60, 4.03]
mic_sub
Mean (SD) 3.87 (0.233) 3.31 (0.476) 3.49 (0.480)
Median [Min, Max] 3.87 [3.70, 4.03] 3.49 [2.60, 3.64] 3.59 [2.60, 4.03]
sau_inhibition_category
10 to 20 1 (50.0%) 0 (0%) 1 (16.7%)
50 to 60 1 (50.0%) 0 (0%) 1 (16.7%)
30 to 40 0 (0%) 1 (25.0%) 1 (16.7%)
40 to 50 0 (0%) 2 (50.0%) 2 (33.3%)
Top 10 0 (0%) 1 (25.0%) 1 (16.7%)
sub_inhibition_category
50 to 60 2 (100%) 0 (0%) 2 (33.3%)
10 to 20 0 (0%) 1 (25.0%) 1 (16.7%)
40 to 50 0 (0%) 3 (75.0%) 3 (50.0%)
case_control
Control 1 (50.0%) 2 (50.0%) 3 (50.0%)
Case 1 (50.0%) 2 (50.0%) 3 (50.0%)
aip_1
0 0 (0%) 3 (75.0%) 3 (50.0%)
1 2 (100%) 1 (25.0%) 3 (50.0%)
aip_2
0 2 (100%) 4 (100%) 6 (100%)
1 0 (0%) 0 (0%) 0 (0%)
aip_3
0 2 (100%) 4 (100%) 6 (100%)
1 0 (0%) 0 (0%) 0 (0%)
aip_4
0 2 (100%) 4 (100%) 6 (100%)
1 0 (0%) 0 (0%) 0 (0%)
sactipeptides
Mean (SD) 1.00 (0) 1.00 (0) 1.00 (0)
Median [Min, Max] 1.00 [1.00, 1.00] 1.00 [1.00, 1.00] 1.00 [1.00, 1.00]
subtilosin_a
1 0 (0%) 0 (0%) 0 (0%)
0 2 (100%) 4 (100%) 6 (100%)
putative_bacteriocin_193
0 2 (100%) 0 (0%) 2 (33.3%)
1 0 (0%) 4 (100%) 4 (66.7%)
putative_bacteriocin_194
0 2 (100%) 4 (100%) 6 (100%)
1 0 (0%) 0 (0%) 0 (0%)
# MIC across different clades

model <- lm(mic_sau~clade,data=mb_sciuri)
summary(model)
## 
## Call:
## lm(formula = mic_sau ~ clade, data = mb_sciuri)
## 
## Residuals:
##      1      2      3      4      5      6 
##  0.145  0.335  0.665 -0.665  0.225 -0.705 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)   
## (Intercept)    3.3650     0.4423   7.607   0.0016 **
## cladePresent  -0.0600     0.5418  -0.111   0.9171   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6256 on 4 degrees of freedom
## Multiple R-squared:  0.003057,   Adjusted R-squared:  -0.2462 
## F-statistic: 0.01227 on 1 and 4 DF,  p-value: 0.9171
Confint(model)
##              Estimate     2.5 %   97.5 %
## (Intercept)     3.365  2.136854 4.593146
## cladePresent   -0.060 -1.564165 1.444165
emmeans(model,~clade)
##  clade   emmean    SE df lower.CL upper.CL
##  Absent    3.37 0.442  4     2.14     4.59
##  Present   3.31 0.313  4     2.44     4.17
## 
## Confidence level used: 0.95
model <- lm(mic_sub~clade,data=mb_sciuri)
summary(model)
## 
## Call:
## lm(formula = mic_sub ~ clade, data = mb_sciuri)
## 
## Residuals:
##      1      2      3      4      5      6 
##  0.145  0.335  0.165 -0.165  0.225 -0.705 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    3.8650     0.3032  12.748 0.000218 ***
## cladePresent  -0.5600     0.3713  -1.508 0.206010    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4288 on 4 degrees of freedom
## Multiple R-squared:  0.3625, Adjusted R-squared:  0.2031 
## F-statistic: 2.274 on 1 and 4 DF,  p-value: 0.206
Confint(model)
##              Estimate     2.5 %   97.5 %
## (Intercept)     3.865  3.023234 4.706766
## cladePresent   -0.560 -1.590948 0.470948
emmeans(model,~clade)
##  clade   emmean    SE df lower.CL upper.CL
##  Absent    3.87 0.303  4     3.02     4.71
##  Present   3.31 0.214  4     2.71     3.90
## 
## Confidence level used: 0.95
# AIP1

table1(~aip_1|clade,data=mb_sciuri)
Absent
(N=2)
Present
(N=4)
Overall
(N=6)
aip_1
0 0 (0%) 3 (75.0%) 3 (50.0%)
1 2 (100%) 1 (25.0%) 3 (50.0%)
fisher.test(table(mb_sciuri$aip_1,mb_sciuri$clade))
## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(mb_sciuri$aip_1, mb_sciuri$clade)
## p-value = 0.4
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  0.000000 4.922984
## sample estimates:
## odds ratio 
##          0
# putative bacteriocin 193

table1(~putative_bacteriocin_193|clade,data=mb_sciuri)
Absent
(N=2)
Present
(N=4)
Overall
(N=6)
putative_bacteriocin_193
0 2 (100%) 0 (0%) 2 (33.3%)
1 0 (0%) 4 (100%) 4 (66.7%)
fisher.test(table(mb_sciuri$putative_bacteriocin_193,mb_sciuri$clade))
## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(mb_sciuri$putative_bacteriocin_193, mb_sciuri$clade)
## p-value = 0.06667
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  0.5079839       Inf
## sample estimates:
## odds ratio 
##        Inf

2.4 SSUC

# ---- AMPs in SSUC ----

# none of the AMPs were both present and absent in at least 2 isolates

# Clades

# List of target isolates
target_isolates <- c("SSUC1", "SSUC2", "SSUC3", "SSUC4", "SSUC5", "SSUC10", "SSUC12")


# Create a new column 'presence' based on target isolates
mb_succinus <- mb_succinus %>%
  mutate(clade = ifelse(wgs_isolate_id %in% target_isolates, "Present", "Absent"))


table1(~mic_sau + mic_sub+sau_inhibition_category+sub_inhibition_category+subtilosin_a|clade,data=subset(mb_succinus,mb_succinus$wgs_species_3!="SAU"))
Absent
(N=5)
Present
(N=7)
Overall
(N=12)
mic_sau
Mean (SD) 3.50 (0.443) 2.44 (0.430) 2.92 (0.692)
Median [Min, Max] 3.58 [3.03, 4.11] 2.40 [1.92, 3.14] 3.03 [1.92, 4.11]
Missing 0 (0%) 1 (14.3%) 1 (8.3%)
mic_sub
Mean (SD) 3.50 (0.649) 2.44 (0.884) 2.92 (0.931)
Median [Min, Max] 3.68 [2.58, 4.11] 2.21 [1.65, 4.14] 2.58 [1.65, 4.14]
Missing 0 (0%) 1 (14.3%) 1 (8.3%)
sau_inhibition_category
20 to 30 2 (40.0%) 1 (14.3%) 3 (25.0%)
40 to 50 2 (40.0%) 0 (0%) 2 (16.7%)
60 to 70 1 (20.0%) 0 (0%) 1 (8.3%)
Top 10 0 (0%) 5 (71.4%) 5 (41.7%)
Missing 0 (0%) 1 (14.3%) 1 (8.3%)
sub_inhibition_category
10 to 20 1 (20.0%) 0 (0%) 1 (8.3%)
30 to 40 1 (20.0%) 0 (0%) 1 (8.3%)
40 to 50 1 (20.0%) 0 (0%) 1 (8.3%)
50 to 60 1 (20.0%) 0 (0%) 1 (8.3%)
60 to 70 1 (20.0%) 1 (14.3%) 2 (16.7%)
Top 10 0 (0%) 5 (71.4%) 5 (41.7%)
Missing 0 (0%) 1 (14.3%) 1 (8.3%)
subtilosin_a
1 0 (0%) 0 (0%) 0 (0%)
0 5 (100%) 7 (100%) 12 (100%)
# MIC across different clades

model <- lm(mic_sau~clade,data=mb_succinus)
summary(model)
## 
## Call:
## lm(formula = mic_sau ~ clade, data = mb_succinus)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -0.520 -0.346  0.070  0.194  0.700 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    3.5020     0.1949  17.971 2.33e-08 ***
## cladePresent  -1.0620     0.2639  -4.025    0.003 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4357 on 9 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.6429, Adjusted R-squared:  0.6032 
## F-statistic:  16.2 on 1 and 9 DF,  p-value: 0.002996
Confint(model)
##              Estimate     2.5 %     97.5 %
## (Intercept)     3.502  3.061169  3.9428312
## cladePresent   -1.062 -1.658888 -0.4651118
emmeans(model,~clade)
##  clade   emmean    SE df lower.CL upper.CL
##  Absent    3.50 0.195  9     3.06     3.94
##  Present   2.44 0.178  9     2.04     2.84
## 
## Confidence level used: 0.95
model <- lm(mic_sub~clade,data=mb_succinus)
summary(model)
## 
## Call:
## lm(formula = mic_sub ~ clade, data = mb_succinus)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -0.922 -0.456 -0.160  0.353  1.700 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    3.5020     0.3524   9.937 3.77e-06 ***
## cladePresent  -1.0620     0.4772  -2.226   0.0531 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.788 on 9 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.355,  Adjusted R-squared:  0.2833 
## F-statistic: 4.953 on 1 and 9 DF,  p-value: 0.05308
Confint(model)
##              Estimate     2.5 %     97.5 %
## (Intercept)     3.502  2.704779 4.29922133
## cladePresent   -1.062 -2.141443 0.01744278
emmeans(model,~clade)
##  clade   emmean    SE df lower.CL upper.CL
##  Absent    3.50 0.352  9     2.70     4.30
##  Present   2.44 0.322  9     1.71     3.17
## 
## Confidence level used: 0.95

2.5 SXYL & SPXYL

# ---- AMPs in SXYL/PXYL ----

# AIP2

table1(~aip_2|case_control,data=mb_xylosus)
Control
(N=8)
Case
(N=10)
Overall
(N=18)
aip_2
0 2 (25.0%) 4 (40.0%) 6 (33.3%)
1 6 (75.0%) 6 (60.0%) 12 (66.7%)
model_1 <- glm(aip_2 ~ case_control,family=binomial(link="logit"),data=mb_xylosus)
summary(model_1)
## 
## Call:
## glm(formula = aip_2 ~ case_control, family = binomial(link = "logit"), 
##     data = mb_xylosus)
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)
## (Intercept)        1.0986     0.8165   1.346    0.178
## case_controlCase  -0.6931     1.0408  -0.666    0.505
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 22.915  on 17  degrees of freedom
## Residual deviance: 22.458  on 16  degrees of freedom
## AIC: 26.458
## 
## Number of Fisher Scoring iterations: 4
exp(Confint(model_1))
##                  Estimate      2.5 %    97.5 %
## (Intercept)           3.0 0.69120194 20.475683
## case_controlCase      0.5 0.05327731  3.663896
effectsize::oddsratio_to_riskratio(model_1)
## Warning: 'p0' not provided.
##   RR is relative to the intercept (p0 = 0.75) - make sure your intercept
##   is meaningful.
## CIs are back-transformed from the logit scale.
## Parameter           | Risk Ratio |       95% CI
## -----------------------------------------------
## (Intercept)         |       0.75 |             
## case control [Case] |       0.80 | [0.18, 1.22]
## 
## Uncertainty intervals (profile-likelihood) and p-values (two-tailed)
##   computed using a Wald z-distribution approximation.
emmeans(model_1,~case_control,type="response")
##  case_control prob    SE  df asymp.LCL asymp.UCL
##  Control      0.75 0.153 Inf     0.377     0.937
##  Case         0.60 0.155 Inf     0.297     0.842
## 
## Confidence level used: 0.95 
## Intervals are back-transformed from the logit scale
# AIP3

table1(~aip_3|case_control,data=mb_xylosus)
Control
(N=8)
Case
(N=10)
Overall
(N=18)
aip_3
0 6 (75.0%) 7 (70.0%) 13 (72.2%)
1 2 (25.0%) 3 (30.0%) 5 (27.8%)
model_2 <- glm(aip_3 ~ case_control,family=binomial(link="logit"),data=mb_xylosus)
summary(model_2)
## 
## Call:
## glm(formula = aip_3 ~ case_control, family = binomial(link = "logit"), 
##     data = mb_xylosus)
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)
## (Intercept)       -1.0986     0.8165  -1.346    0.178
## case_controlCase   0.2513     1.0690   0.235    0.814
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 21.270  on 17  degrees of freedom
## Residual deviance: 21.215  on 16  degrees of freedom
## AIC: 25.215
## 
## Number of Fisher Scoring iterations: 4
exp(Confint(model_2))
##                   Estimate      2.5 %    97.5 %
## (Intercept)      0.3333333 0.04883842  1.446755
## case_controlCase 1.2857143 0.15782429 12.403957
effectsize::oddsratio_to_riskratio(model_2)
## Warning: 'p0' not provided.
##   RR is relative to the intercept (p0 = 0.25) - make sure your intercept
##   is meaningful.
## CIs are back-transformed from the logit scale.
## Parameter           | Risk Ratio |       95% CI
## -----------------------------------------------
## (Intercept)         |       0.25 |             
## case control [Case] |       1.20 | [0.20, 3.22]
## 
## Uncertainty intervals (profile-likelihood) and p-values (two-tailed)
##   computed using a Wald z-distribution approximation.
emmeans(model_2,~case_control,type="response")
##  case_control prob    SE  df asymp.LCL asymp.UCL
##  Control      0.25 0.153 Inf    0.0630     0.623
##  Case         0.30 0.145 Inf    0.0998     0.624
## 
## Confidence level used: 0.95 
## Intervals are back-transformed from the logit scale
# putative bacteriocin 194

table1(~putative_bacteriocin_194|case_control,data=mb_xylosus)
Control
(N=8)
Case
(N=10)
Overall
(N=18)
putative_bacteriocin_194
0 5 (62.5%) 6 (60.0%) 11 (61.1%)
1 3 (37.5%) 4 (40.0%) 7 (38.9%)
model_3 <- glm(putative_bacteriocin_194 ~ case_control,family=binomial(link="logit"),data=mb_xylosus)
summary(model_3)
## 
## Call:
## glm(formula = putative_bacteriocin_194 ~ case_control, family = binomial(link = "logit"), 
##     data = mb_xylosus)
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)
## (Intercept)       -0.5108     0.7303  -0.699    0.484
## case_controlCase   0.1054     0.9747   0.108    0.914
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 24.057  on 17  degrees of freedom
## Residual deviance: 24.045  on 16  degrees of freedom
## AIC: 28.045
## 
## Number of Fisher Scoring iterations: 4
exp(Confint(model_3))
##                  Estimate     2.5 %   97.5 %
## (Intercept)      0.600000 0.1230568 2.445302
## case_controlCase 1.111111 0.1616472 8.069622
effectsize::oddsratio_to_riskratio(model_3)
## Warning: 'p0' not provided.
##   RR is relative to the intercept (p0 = 0.38) - make sure your intercept
##   is meaningful.
## CIs are back-transformed from the logit scale.
## Parameter           | Risk Ratio |       95% CI
## -----------------------------------------------
## (Intercept)         |       0.38 |             
## case control [Case] |       1.07 | [0.24, 2.21]
## 
## Uncertainty intervals (profile-likelihood) and p-values (two-tailed)
##   computed using a Wald z-distribution approximation.
emmeans(model_3,~case_control,type="response")
##  case_control  prob    SE  df asymp.LCL asymp.UCL
##  Control      0.375 0.171 Inf     0.125     0.715
##  Case         0.400 0.155 Inf     0.158     0.703
## 
## Confidence level used: 0.95 
## Intervals are back-transformed from the logit scale
# Association between AMPs and MIC in SXYL

# AIP2

model_1 <- lm(mic_sau ~ aip_2,data=mb_xylosus)
summary(model_1)
## 
## Call:
## lm(formula = mic_sau ~ aip_2, data = mb_xylosus)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.43500 -0.25917 -0.13417  0.04437  0.94500 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   3.1450     0.1733   18.15 4.25e-12 ***
## aip_21       -0.1358     0.2122   -0.64    0.531    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4244 on 16 degrees of freedom
## Multiple R-squared:  0.02497,    Adjusted R-squared:  -0.03597 
## F-statistic: 0.4097 on 1 and 16 DF,  p-value: 0.5312
model_1 <- lm(mic_sub ~ aip_2,data=mb_xylosus)
summary(model_1)
## 
## Call:
## lm(formula = mic_sub ~ aip_2, data = mb_xylosus)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.26833 -0.38250 -0.06542  0.56354  1.11167 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   2.9783     0.2678  11.122 6.13e-09 ***
## aip_21        0.1142     0.3280   0.348    0.732    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6559 on 16 degrees of freedom
## Multiple R-squared:  0.007517,   Adjusted R-squared:  -0.05451 
## F-statistic: 0.1212 on 1 and 16 DF,  p-value: 0.7323
# AIP3

model_1 <- lm(mic_sau ~ aip_3,data=mb_xylosus)
summary(model_1)
## 
## Call:
## lm(formula = mic_sau ~ aip_3, data = mb_xylosus)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.47800 -0.25308 -0.08808  0.04442  0.92692 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   3.0031     0.1167  25.734  1.9e-14 ***
## aip_31        0.1849     0.2214   0.835    0.416    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4208 on 16 degrees of freedom
## Multiple R-squared:  0.04177,    Adjusted R-squared:  -0.01811 
## F-statistic: 0.6975 on 1 and 16 DF,  p-value: 0.4159
model_1 <- lm(mic_sub ~ aip_3,data=mb_xylosus)
summary(model_1)
## 
## Call:
## lm(formula = mic_sub ~ aip_3, data = mb_xylosus)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.2780 -0.3700 -0.1100  0.5705  1.1020 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   3.0800     0.1822  16.904 1.26e-11 ***
## aip_31       -0.0920     0.3457  -0.266    0.794    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.657 on 16 degrees of freedom
## Multiple R-squared:  0.004407,   Adjusted R-squared:  -0.05782 
## F-statistic: 0.07082 on 1 and 16 DF,  p-value: 0.7935
# putative bacteriocin 194

model_2 <- lm(mic_sau ~ putative_bacteriocin_194,data=mb_xylosus)
summary(model_2)
## 
## Call:
## lm(formula = mic_sau ~ putative_bacteriocin_194, data = mb_xylosus)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.41091 -0.26591 -0.17403  0.06286  0.98286 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                3.02091    0.12890  23.436  8.2e-14 ***
## putative_bacteriocin_1941  0.08623    0.20670   0.417    0.682    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4275 on 16 degrees of freedom
## Multiple R-squared:  0.01076,    Adjusted R-squared:  -0.05107 
## F-statistic: 0.1741 on 1 and 16 DF,  p-value: 0.6821
model_2 <- lm(mic_sub ~ putative_bacteriocin_194,data=mb_xylosus)
summary(model_2)
## 
## Call:
## lm(formula = mic_sub ~ putative_bacteriocin_194, data = mb_xylosus)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.25429 -0.40182 -0.05805  0.56756  1.12571 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 3.1118     0.1972  15.782 3.56e-11 ***
## putative_bacteriocin_1941  -0.1475     0.3162  -0.467    0.647    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.654 on 16 degrees of freedom
## Multiple R-squared:  0.01342,    Adjusted R-squared:  -0.04824 
## F-statistic: 0.2177 on 1 and 16 DF,  p-value: 0.6471
# clades

target_isolates <- c("SXYL1", "SXYL2", "SXYL4", "SXYL10", "SXYL11", "SXYL13")

mb_xylosus <- mb_xylosus %>%
  mutate(clade = ifelse(wgs_isolate_id %in% target_isolates, "Present_xylosus", "Absent_xylosus"))


mb_xylosus$clade <- ifelse(mb_xylosus$wgs_species_3=="SXYL",mb_xylosus$clade,"Ps_xylosus")


table1(~mic_sau + mic_sub+sau_inhibition_category+sub_inhibition_category+case_control+aip_1+aip_2+aip_3+aip_4+sactipeptides+subtilosin_a+putative_bacteriocin_193+putative_bacteriocin_194|clade,data=mb_xylosus)
Absent_xylosus
(N=7)
Present_xylosus
(N=6)
Ps_xylosus
(N=5)
Overall
(N=18)
mic_sau
Mean (SD) 2.98 (0.291) 3.14 (0.505) 3.06 (0.522) 3.05 (0.417)
Median [Min, Max] 2.85 [2.74, 3.56] 3.04 [2.71, 4.09] 3.01 [2.61, 3.93] 2.92 [2.61, 4.09]
mic_sub
Mean (SD) 3.12 (0.514) 2.97 (0.915) 3.06 (0.522) 3.05 (0.639)
Median [Min, Max] 2.93 [2.56, 3.85] 3.04 [1.71, 4.09] 3.01 [2.61, 3.93] 2.97 [1.71, 4.09]
sau_inhibition_category
10 to 20 4 (57.1%) 2 (33.3%) 1 (20.0%) 7 (38.9%)
20 to 30 1 (14.3%) 1 (16.7%) 2 (40.0%) 4 (22.2%)
30 to 40 1 (14.3%) 2 (33.3%) 0 (0%) 3 (16.7%)
40 to 50 1 (14.3%) 0 (0%) 0 (0%) 1 (5.6%)
50 to 60 0 (0%) 1 (16.7%) 1 (20.0%) 2 (11.1%)
Top 10 0 (0%) 0 (0%) 1 (20.0%) 1 (5.6%)
sub_inhibition_category
10 to 20 1 (14.3%) 0 (0%) 2 (40.0%) 3 (16.7%)
20 to 30 3 (42.9%) 1 (16.7%) 2 (40.0%) 6 (33.3%)
30 to 40 1 (14.3%) 1 (16.7%) 0 (0%) 2 (11.1%)
50 to 60 2 (28.6%) 1 (16.7%) 1 (20.0%) 4 (22.2%)
60 to 70 0 (0%) 1 (16.7%) 0 (0%) 1 (5.6%)
Top 10 0 (0%) 2 (33.3%) 0 (0%) 2 (11.1%)
case_control
Control 3 (42.9%) 3 (50.0%) 2 (40.0%) 8 (44.4%)
Case 4 (57.1%) 3 (50.0%) 3 (60.0%) 10 (55.6%)
aip_1
0 7 (100%) 6 (100%) 5 (100%) 18 (100%)
1 0 (0%) 0 (0%) 0 (0%) 0 (0%)
aip_2
0 2 (28.6%) 4 (66.7%) 0 (0%) 6 (33.3%)
1 5 (71.4%) 2 (33.3%) 5 (100%) 12 (66.7%)
aip_3
0 6 (85.7%) 2 (33.3%) 5 (100%) 13 (72.2%)
1 1 (14.3%) 4 (66.7%) 0 (0%) 5 (27.8%)
aip_4
0 6 (85.7%) 6 (100%) 5 (100%) 17 (94.4%)
1 1 (14.3%) 0 (0%) 0 (0%) 1 (5.6%)
sactipeptides
Mean (SD) 1.00 (0) 1.00 (0) 1.00 (0) 1.00 (0)
Median [Min, Max] 1.00 [1.00, 1.00] 1.00 [1.00, 1.00] 1.00 [1.00, 1.00] 1.00 [1.00, 1.00]
subtilosin_a
1 0 (0%) 0 (0%) 0 (0%) 0 (0%)
0 7 (100%) 6 (100%) 5 (100%) 18 (100%)
putative_bacteriocin_193
0 7 (100%) 6 (100%) 5 (100%) 18 (100%)
1 0 (0%) 0 (0%) 0 (0%) 0 (0%)
putative_bacteriocin_194
0 6 (85.7%) 0 (0%) 5 (100%) 11 (61.1%)
1 1 (14.3%) 6 (100%) 0 (0%) 7 (38.9%)
# AIP2

table1(~aip_2|clade,data=mb_xylosus)
Absent_xylosus
(N=7)
Present_xylosus
(N=6)
Ps_xylosus
(N=5)
Overall
(N=18)
aip_2
0 2 (28.6%) 4 (66.7%) 0 (0%) 6 (33.3%)
1 5 (71.4%) 2 (33.3%) 5 (100%) 12 (66.7%)
fisher.test(table(mb_xylosus$aip_2,mb_xylosus$clade))
## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(mb_xylosus$aip_2, mb_xylosus$clade)
## p-value = 0.07428
## alternative hypothesis: two.sided
# AIP3

table1(~aip_3|clade,data=mb_xylosus)
Absent_xylosus
(N=7)
Present_xylosus
(N=6)
Ps_xylosus
(N=5)
Overall
(N=18)
aip_3
0 6 (85.7%) 2 (33.3%) 5 (100%) 13 (72.2%)
1 1 (14.3%) 4 (66.7%) 0 (0%) 5 (27.8%)
fisher.test(table(mb_xylosus$aip_3,mb_xylosus$clade))
## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(mb_xylosus$aip_3, mb_xylosus$clade)
## p-value = 0.03186
## alternative hypothesis: two.sided
# putative bacteriocin 194

table1(~putative_bacteriocin_194|clade,data=mb_xylosus)
Absent_xylosus
(N=7)
Present_xylosus
(N=6)
Ps_xylosus
(N=5)
Overall
(N=18)
putative_bacteriocin_194
0 6 (85.7%) 0 (0%) 5 (100%) 11 (61.1%)
1 1 (14.3%) 6 (100%) 0 (0%) 7 (38.9%)
fisher.test(table(mb_xylosus$putative_bacteriocin_194,mb_xylosus$clade))
## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(mb_xylosus$putative_bacteriocin_194, mb_xylosus$clade)
## p-value = 0.0004085
## alternative hypothesis: two.sided

3 Virulence genes

3.1 Summary

# Number of genes identified

virulence_annotated %>% group_by(vir_gene_type) %>% summarise(n=n_distinct(vir_gene_abbreviation))
## # A tibble: 6 × 2
##   vir_gene_type              n
##   <chr>                  <int>
## 1 Adherence                 37
## 2 Enterotoxins_exotoxins    87
## 3 Exoenzymes                22
## 4 Immune_evasion            34
## 5 Iron_uptake_metabolism    29
## 6 Toxin_genes               30
virulence_annotated %>% ungroup() %>% summarise(n=n_distinct(vir_gene_nonredundant))
## # A tibble: 1 × 1
##       n
##   <int>
## 1   200
virulence_annotated %>% group_by(vir_gene_type) %>% summarise(n=n_distinct(vir_gene_nonredundant))
## # A tibble: 6 × 2
##   vir_gene_type              n
##   <chr>                  <int>
## 1 Adherence                 30
## 2 Enterotoxins_exotoxins    59
## 3 Exoenzymes                18
## 4 Immune_evasion            34
## 5 Iron_uptake_metabolism    29
## 6 Toxin_genes               30
# Adherence genes 

adherence_summary <- adherence_long %>% group_by(vir_gene_nonredundant,wgs_species_3) %>% summarise(n=max(presence_gene_nr)) %>% filter(n==1) %>% filter(wgs_species_3!="SUB")
## `summarise()` has grouped output by 'vir_gene_nonredundant'. You can override
## using the `.groups` argument.
adherence_summary %>% ungroup() %>% group_by(wgs_species_3) %>% summarise(n_genes=sum(n)) %>% mutate(prop_present = (n_genes/37)*100)
## # A tibble: 10 × 3
##    wgs_species_3 n_genes prop_present
##    <chr>           <dbl>        <dbl>
##  1 SAU                30         81.1
##  2 SCH                19         51.4
##  3 SCON                8         21.6
##  4 SDEV               17         45.9
##  5 SEQ                14         37.8
##  6 SHAEM              21         56.8
##  7 SPXYL              17         45.9
##  8 SSC                20         54.1
##  9 SSUC               14         37.8
## 10 SXYL               22         59.5
# Exoenzymes genes 

exoenzymes_summary <- exoenzymes_long %>% group_by(vir_gene_nonredundant,wgs_species_3) %>% summarise(n=max(presence_gene_nr)) %>% filter(n==1) %>% filter(wgs_species_3!="SUB")
## `summarise()` has grouped output by 'vir_gene_nonredundant'. You can override
## using the `.groups` argument.
exoenzymes_summary %>% ungroup() %>% group_by(wgs_species_3) %>% summarise(n_genes=sum(n)) %>% mutate(prop_present = (n_genes/22)*100)
## # A tibble: 10 × 3
##    wgs_species_3 n_genes prop_present
##    <chr>           <dbl>        <dbl>
##  1 SAU                17         77.3
##  2 SCH                 6         27.3
##  3 SCON                7         31.8
##  4 SDEV                4         18.2
##  5 SEQ                 4         18.2
##  6 SHAEM               4         18.2
##  7 SPXYL               7         31.8
##  8 SSC                 6         27.3
##  9 SSUC                9         40.9
## 10 SXYL                8         36.4
# Immune evasion genes

im_ev_summary <- im_ev_long %>% group_by(vir_gene_nonredundant,wgs_species_3) %>% summarise(n=max(presence_gene_nr)) %>% filter(n==1) %>% filter(wgs_species_3!="SUB")
## `summarise()` has grouped output by 'vir_gene_nonredundant'. You can override
## using the `.groups` argument.
im_ev_summary %>% ungroup() %>% group_by(wgs_species_3) %>% summarise(n_genes=sum(n)) %>% mutate(prop_present = (n_genes/35)*100)
## # A tibble: 10 × 3
##    wgs_species_3 n_genes prop_present
##    <chr>           <dbl>        <dbl>
##  1 SAU                34         97.1
##  2 SCH                30         85.7
##  3 SCON                8         22.9
##  4 SDEV               12         34.3
##  5 SEQ                15         42.9
##  6 SHAEM              18         51.4
##  7 SPXYL               4         11.4
##  8 SSC                 8         22.9
##  9 SSUC                4         11.4
## 10 SXYL               28         80
# Iron related genes

iron_summary <- iron_long %>% group_by(vir_gene_nonredundant,wgs_species_3) %>% summarise(n=max(presence_gene_nr)) %>% filter(n==1) %>% filter(wgs_species_3!="SUB")
## `summarise()` has grouped output by 'vir_gene_nonredundant'. You can override
## using the `.groups` argument.
iron_summary %>% ungroup() %>% group_by(wgs_species_3) %>% summarise(n_genes=sum(n)) %>% mutate(prop_present = (n_genes/29)*100)
## # A tibble: 10 × 3
##    wgs_species_3 n_genes prop_present
##    <chr>           <dbl>        <dbl>
##  1 SAU                29        100  
##  2 SCH                14         48.3
##  3 SCON               10         34.5
##  4 SDEV               12         41.4
##  5 SEQ                20         69.0
##  6 SHAEM              16         55.2
##  7 SPXYL              11         37.9
##  8 SSC                17         58.6
##  9 SSUC               11         37.9
## 10 SXYL               10         34.5
# Enterotoxins and exotoxins genes

enterotoxin_summary <- enterotoxin_long %>% group_by(vir_gene_nonredundant,wgs_species_3) %>% summarise(n=max(presence_gene_nr)) %>% filter(n==1) %>% filter(wgs_species_3!="SUB")
## `summarise()` has grouped output by 'vir_gene_nonredundant'. You can override
## using the `.groups` argument.
enterotoxin_summary %>% ungroup() %>% group_by(wgs_species_3) %>% summarise(n_genes=sum(n)) %>% mutate(prop_present = (n_genes/87)*100)
## # A tibble: 2 × 3
##   wgs_species_3 n_genes prop_present
##   <chr>           <dbl>        <dbl>
## 1 SAU                59         67.8
## 2 SCH                36         41.4
# Toxin genes 

toxin_summary <- toxin_long %>% group_by(vir_gene_nonredundant,wgs_species_3) %>% summarise(n=max(presence_gene_nr)) %>% filter(n==1) %>% filter(wgs_species_3!="SUB")
## `summarise()` has grouped output by 'vir_gene_nonredundant'. You can override
## using the `.groups` argument.
toxin_summary %>% ungroup() %>% group_by(wgs_species_3) %>% summarise(n_genes=sum(n)) %>% mutate(prop_present = (n_genes/36)*100)
## # A tibble: 10 × 3
##    wgs_species_3 n_genes prop_present
##    <chr>           <dbl>        <dbl>
##  1 SAU                27        75   
##  2 SCH                10        27.8 
##  3 SCON                8        22.2 
##  4 SDEV                7        19.4 
##  5 SEQ                 6        16.7 
##  6 SHAEM               8        22.2 
##  7 SPXYL               9        25   
##  8 SSC                 3         8.33
##  9 SSUC                7        19.4 
## 10 SXYL                8        22.2

3.2 Visualizations

3.2.1 Adherence

# ---- Adherence genes ----

adherence_sau <- adherence_long %>% filter(grepl("SAU",wgs_isolate_id)) 


adherence_plot_1 <- adherence_sau %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(adherence_sau$wgs_isolate_id,adherence_sau$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus aureus") +
  scale_y_discrete(limits = rev) +
theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())




adherence_sch <- adherence_long %>% filter(grepl("SCH",wgs_isolate_id)) 


adherence_plot_2 <- adherence_sch %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(adherence_sch$wgs_isolate_id,adherence_sch$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus chromogenes") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())


adherence_shaem <- adherence_long %>% filter(grepl("SHAEM",wgs_isolate_id)) 


adherence_plot_3 <- adherence_shaem %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(adherence_shaem$wgs_isolate_id,adherence_shaem$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus haemolyticus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())




adherence_ssc <- adherence_long %>% filter(grepl("SSC",wgs_isolate_id)) 


adherence_plot_4 <- adherence_ssc %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(adherence_ssc$wgs_isolate_id,adherence_ssc$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus sciuri") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())





adherence_ssuc <- adherence_long %>% filter(grepl("SSUC",wgs_isolate_id)) 




adherence_plot_5 <- adherence_ssuc %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(adherence_ssuc$wgs_isolate_id,adherence_ssuc$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus succinus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())



adherence_ssxyl <- adherence_long %>% filter(grepl("SXYL",wgs_isolate_id)) 

adherence_ssxyl$wgs_isolate_id <- fct_reorder(adherence_ssxyl$wgs_isolate_id,adherence_ssxyl$wgs_id)

adherence_spxyl <- adherence_long %>% filter(grepl("SPXYL",wgs_isolate_id)) 

levels(adherence_long$wgs_isolate_id)
## NULL
adherence_spxyl$wgs_isolate_id <- fct_reorder(adherence_spxyl$wgs_isolate_id,adherence_spxyl$wgs_id)
             

adherence_sxyl_spxyl <- rbind(adherence_ssxyl,adherence_spxyl)                                 
#adherence_ssxyl$wgs_isolate_id <- fct_reorder(adherence_ssxyl$wgs_isolate_id,adherence_ssxyl$wgs_id)

table(adherence_long$wgs_isolate_id)
## 
##    SAU1    SAU2    SAU3    SCH1   SCH10   SCH11   SCH12   SCH13   SCH14   SCH15 
##      30      30      30      30      30      30      30      30      30      30 
##   SCH16   SCH17   SCH18   SCH19    SCH2   SCH20   SCH21    SCH3    SCH4    SCH5 
##      30      30      30      30      30      30      30      30      30      30 
##    SCH6    SCH7    SCH8    SCH9   SCON1   SDEV1   SDEV2    SEQ1  SHAEM1 SHAEM10 
##      30      30      30      30      30      30      30      30      30      30 
## SHAEM11 SHAEM12 SHAEM13 SHAEM14 SHAEM15 SHAEM16 SHAEM17 SHAEM18 SHAEM19  SHAEM2 
##      30      30      30      30      30      30      30      30      30      30 
##  SHAEM3  SHAEM4  SHAEM5  SHAEM6  SHAEM7  SHAEM8  SHAEM9  SPXYL1  SPXYL2  SPXYL3 
##      30      30      30      30      30      30      30      30      30      30 
##  SPXYL4  SPXYL5    SSC1    SSC2    SSC3    SSC4    SSC5    SSC6   SSUC1  SSUC10 
##      30      30      30      30      30      30      30      30      30      30 
##  SSUC11  SSUC12   SSUC2   SSUC3   SSUC4   SSUC5   SSUC6   SSUC7   SSUC8   SSUC9 
##      30      30      30      30      30      30      30      30      30      30 
##    SUB1   SXYL1  SXYL10  SXYL11  SXYL12  SXYL13   SXYL2   SXYL3   SXYL4   SXYL5 
##      30      30      30      30      30      30      30      30      30      30 
##   SXYL6   SXYL7   SXYL8   SXYL9 
##      30      30      30      30
adherence_plot_6 <- adherence_sxyl_spxyl %>%
  ggplot(aes(x = vir_gene_nonredundant, y = wgs_isolate_id)) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus xylosus/pseudoxylosus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())


adherence_scon <- adherence_long %>% filter(grepl("SCON",wgs_isolate_id)) 

adherence_scon$wgs_isolate_id <- fct_reorder(adherence_scon$wgs_isolate_id,adherence_scon$wgs_id)

adherence_sdev <- adherence_long %>% filter(grepl("SDEV",wgs_isolate_id)) 

levels(adherence_long$wgs_isolate_id)
## NULL
adherence_sdev$wgs_isolate_id <- fct_reorder(adherence_sdev$wgs_isolate_id,adherence_sdev$wgs_id)



adherence_seq <- adherence_long %>% filter(grepl("SEQ",wgs_isolate_id)) 

adherence_seq$wgs_isolate_id <- fct_reorder(adherence_seq$wgs_isolate_id,adherence_seq$wgs_id)


adherence_other <- rbind(adherence_scon,adherence_sdev,adherence_seq)                                 

table(adherence_long$wgs_isolate_id)
## 
##    SAU1    SAU2    SAU3    SCH1   SCH10   SCH11   SCH12   SCH13   SCH14   SCH15 
##      30      30      30      30      30      30      30      30      30      30 
##   SCH16   SCH17   SCH18   SCH19    SCH2   SCH20   SCH21    SCH3    SCH4    SCH5 
##      30      30      30      30      30      30      30      30      30      30 
##    SCH6    SCH7    SCH8    SCH9   SCON1   SDEV1   SDEV2    SEQ1  SHAEM1 SHAEM10 
##      30      30      30      30      30      30      30      30      30      30 
## SHAEM11 SHAEM12 SHAEM13 SHAEM14 SHAEM15 SHAEM16 SHAEM17 SHAEM18 SHAEM19  SHAEM2 
##      30      30      30      30      30      30      30      30      30      30 
##  SHAEM3  SHAEM4  SHAEM5  SHAEM6  SHAEM7  SHAEM8  SHAEM9  SPXYL1  SPXYL2  SPXYL3 
##      30      30      30      30      30      30      30      30      30      30 
##  SPXYL4  SPXYL5    SSC1    SSC2    SSC3    SSC4    SSC5    SSC6   SSUC1  SSUC10 
##      30      30      30      30      30      30      30      30      30      30 
##  SSUC11  SSUC12   SSUC2   SSUC3   SSUC4   SSUC5   SSUC6   SSUC7   SSUC8   SSUC9 
##      30      30      30      30      30      30      30      30      30      30 
##    SUB1   SXYL1  SXYL10  SXYL11  SXYL12  SXYL13   SXYL2   SXYL3   SXYL4   SXYL5 
##      30      30      30      30      30      30      30      30      30      30 
##   SXYL6   SXYL7   SXYL8   SXYL9 
##      30      30      30      30
adherence_plot_7 <- adherence_other %>%
  ggplot(aes(x = vir_gene_nonredundant, y = wgs_isolate_id)) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Adherence",fill="Presence gene",title="Other") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),,axis.text.x = element_text(angle = 45, hjust = 1))




# Multipanel figure

adherence_plot <- ggarrange(adherence_plot_1 + rremove("xlab")+ rremove("ylab"),adherence_plot_2 + rremove("xlab")+ rremove("ylab"),adherence_plot_3 + rremove("xlab")+ rremove("ylab"),adherence_plot_4 + rremove("xlab")+ rremove("ylab"),adherence_plot_5 + rremove("xlab")+ rremove("ylab"),adherence_plot_6 + rremove("xlab")+ rremove("ylab"),adherence_plot_7 + rremove("xlab")+ rremove("ylab"),  ncol = 1, nrow = 7, common.legend = TRUE,align ="v",labels = c("A", "B", "C","D","E","F","G"),heights = c(1.25,4.5,4.25,1.75,3,3.5,2))

annotate_figure(adherence_plot,
                bottom = text_grob("Gene", color = "black",
                                   hjust = 0.75, x = 0.5),
                left = text_grob("Isolate ID", color = "black", rot = 90, size = 12))

ggsave(plot = last_plot(),"./figures/adherence.png",width = 20, height = 40, units = "cm")

3.2.2 Exoenzymes

# ---- exoenzymes genes ----

exoenzymes_sau <- exoenzymes_long %>% filter(grepl("SAU",wgs_isolate_id)) 


exoenzymes_plot_1 <- exoenzymes_sau %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(exoenzymes_sau$wgs_isolate_id,exoenzymes_sau$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus aureus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())




exoenzymes_sch <- exoenzymes_long %>% filter(grepl("SCH",wgs_isolate_id)) 


exoenzymes_plot_2 <- exoenzymes_sch %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(exoenzymes_sch$wgs_isolate_id,exoenzymes_sch$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus chromogenes") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())


exoenzymes_shaem <- exoenzymes_long %>% filter(grepl("SHAEM",wgs_isolate_id)) 


exoenzymes_plot_3 <- exoenzymes_shaem %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(exoenzymes_shaem$wgs_isolate_id,exoenzymes_shaem$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus haemolyticus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())




exoenzymes_ssc <- exoenzymes_long %>% filter(grepl("SSC",wgs_isolate_id)) 


exoenzymes_plot_4 <- exoenzymes_ssc %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(exoenzymes_ssc$wgs_isolate_id,exoenzymes_ssc$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus sciuri") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())





exoenzymes_ssuc <- exoenzymes_long %>% filter(grepl("SSUC",wgs_isolate_id)) 




exoenzymes_plot_5 <- exoenzymes_ssuc %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(exoenzymes_ssuc$wgs_isolate_id,exoenzymes_ssuc$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus succinus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())



exoenzymes_ssxyl <- exoenzymes_long %>% filter(grepl("SXYL",wgs_isolate_id)) 

exoenzymes_ssxyl$wgs_isolate_id <- fct_reorder(exoenzymes_ssxyl$wgs_isolate_id,exoenzymes_ssxyl$wgs_id)

exoenzymes_spxyl <- exoenzymes_long %>% filter(grepl("SPXYL",wgs_isolate_id)) 

levels(exoenzymes_long$wgs_isolate_id)
## NULL
exoenzymes_spxyl$wgs_isolate_id <- fct_reorder(exoenzymes_spxyl$wgs_isolate_id,exoenzymes_spxyl$wgs_id)


exoenzymes_sxyl_spxyl <- rbind(exoenzymes_ssxyl,exoenzymes_spxyl)                                 
#exoenzymes_ssxyl$wgs_isolate_id <- fct_reorder(exoenzymes_ssxyl$wgs_isolate_id,exoenzymes_ssxyl$wgs_id)

table(exoenzymes_long$wgs_isolate_id)
## 
##    SAU1    SAU2    SAU3    SCH1   SCH10   SCH11   SCH12   SCH13   SCH14   SCH15 
##      18      18      18      18      18      18      18      18      18      18 
##   SCH16   SCH17   SCH18   SCH19    SCH2   SCH20   SCH21    SCH3    SCH4    SCH5 
##      18      18      18      18      18      18      18      18      18      18 
##    SCH6    SCH7    SCH8    SCH9   SCON1   SDEV1   SDEV2    SEQ1  SHAEM1 SHAEM10 
##      18      18      18      18      18      18      18      18      18      18 
## SHAEM11 SHAEM12 SHAEM13 SHAEM14 SHAEM15 SHAEM16 SHAEM17 SHAEM18 SHAEM19  SHAEM2 
##      18      18      18      18      18      18      18      18      18      18 
##  SHAEM3  SHAEM4  SHAEM5  SHAEM6  SHAEM7  SHAEM8  SHAEM9  SPXYL1  SPXYL2  SPXYL3 
##      18      18      18      18      18      18      18      18      18      18 
##  SPXYL4  SPXYL5    SSC1    SSC2    SSC3    SSC4    SSC5    SSC6   SSUC1  SSUC10 
##      18      18      18      18      18      18      18      18      18      18 
##  SSUC11  SSUC12   SSUC2   SSUC3   SSUC4   SSUC5   SSUC6   SSUC7   SSUC8   SSUC9 
##      18      18      18      18      18      18      18      18      18      18 
##    SUB1   SXYL1  SXYL10  SXYL11  SXYL12  SXYL13   SXYL2   SXYL3   SXYL4   SXYL5 
##      18      18      18      18      18      18      18      18      18      18 
##   SXYL6   SXYL7   SXYL8   SXYL9 
##      18      18      18      18
exoenzymes_plot_6 <- exoenzymes_sxyl_spxyl %>%
  ggplot(aes(x = vir_gene_nonredundant, y = wgs_isolate_id)) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus xylosus/pseudoxylosus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())


exoenzymes_scon <- exoenzymes_long %>% filter(grepl("SCON",wgs_isolate_id)) 

exoenzymes_scon$wgs_isolate_id <- fct_reorder(exoenzymes_scon$wgs_isolate_id,exoenzymes_scon$wgs_id)

exoenzymes_sdev <- exoenzymes_long %>% filter(grepl("SDEV",wgs_isolate_id)) 

levels(exoenzymes_long$wgs_isolate_id)
## NULL
exoenzymes_sdev$wgs_isolate_id <- fct_reorder(exoenzymes_sdev$wgs_isolate_id,exoenzymes_sdev$wgs_id)



exoenzymes_seq <- exoenzymes_long %>% filter(grepl("SEQ",wgs_isolate_id)) 

exoenzymes_seq$wgs_isolate_id <- fct_reorder(exoenzymes_seq$wgs_isolate_id,exoenzymes_seq$wgs_id)


exoenzymes_other <- rbind(exoenzymes_scon,exoenzymes_sdev,exoenzymes_seq)                                 

table(exoenzymes_long$wgs_isolate_id)
## 
##    SAU1    SAU2    SAU3    SCH1   SCH10   SCH11   SCH12   SCH13   SCH14   SCH15 
##      18      18      18      18      18      18      18      18      18      18 
##   SCH16   SCH17   SCH18   SCH19    SCH2   SCH20   SCH21    SCH3    SCH4    SCH5 
##      18      18      18      18      18      18      18      18      18      18 
##    SCH6    SCH7    SCH8    SCH9   SCON1   SDEV1   SDEV2    SEQ1  SHAEM1 SHAEM10 
##      18      18      18      18      18      18      18      18      18      18 
## SHAEM11 SHAEM12 SHAEM13 SHAEM14 SHAEM15 SHAEM16 SHAEM17 SHAEM18 SHAEM19  SHAEM2 
##      18      18      18      18      18      18      18      18      18      18 
##  SHAEM3  SHAEM4  SHAEM5  SHAEM6  SHAEM7  SHAEM8  SHAEM9  SPXYL1  SPXYL2  SPXYL3 
##      18      18      18      18      18      18      18      18      18      18 
##  SPXYL4  SPXYL5    SSC1    SSC2    SSC3    SSC4    SSC5    SSC6   SSUC1  SSUC10 
##      18      18      18      18      18      18      18      18      18      18 
##  SSUC11  SSUC12   SSUC2   SSUC3   SSUC4   SSUC5   SSUC6   SSUC7   SSUC8   SSUC9 
##      18      18      18      18      18      18      18      18      18      18 
##    SUB1   SXYL1  SXYL10  SXYL11  SXYL12  SXYL13   SXYL2   SXYL3   SXYL4   SXYL5 
##      18      18      18      18      18      18      18      18      18      18 
##   SXYL6   SXYL7   SXYL8   SXYL9 
##      18      18      18      18
exoenzymes_plot_7 <- exoenzymes_other %>%
  ggplot(aes(x = vir_gene_nonredundant, y = wgs_isolate_id)) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Exoenzymes",fill="Presence gene",title="Other") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x = element_text(angle = 45, hjust = 1))




# Multipanel figure

exoenzymes_plot <- ggarrange(exoenzymes_plot_1 + rremove("xlab")+ rremove("ylab"),exoenzymes_plot_2 + rremove("xlab")+ rremove("ylab"),exoenzymes_plot_3 + rremove("xlab")+ rremove("ylab"),exoenzymes_plot_4 + rremove("xlab")+ rremove("ylab"),exoenzymes_plot_5 + rremove("xlab")+ rremove("ylab"),exoenzymes_plot_6 + rremove("xlab")+ rremove("ylab"),exoenzymes_plot_7 + rremove("xlab")+ rremove("ylab"),  ncol = 1, nrow = 7, common.legend = TRUE,align ="v",labels = c("A", "B", "C","D","E","F","G"),heights = c(1.25,4.5,4.25,1.75,3,3.5,2))

annotate_figure(exoenzymes_plot,
                bottom = text_grob("Gene", color = "black",
                                   hjust = 0.75, x = 0.5),
                left = text_grob("Isolate ID", color = "black", rot = 90, size = 12))

ggsave(plot = last_plot(),"./figures/exoenzymes.png",width = 20, height = 40, units = "cm")



# plot adherence + exoenzymes

adherence_exoenzymes_plot <- ggarrange(
  adherence_plot_1 + rremove("xlab") + rremove("ylab"),
  exoenzymes_plot_1 + rremove("xlab") + rremove("ylab") ,
  adherence_plot_2 + rremove("xlab") + rremove("ylab"),
  exoenzymes_plot_2 + rremove("xlab") + rremove("ylab") ,
  adherence_plot_3 + rremove("xlab") + rremove("ylab"),
  exoenzymes_plot_3 + rremove("xlab") + rremove("ylab") ,
  adherence_plot_4 + rremove("xlab") + rremove("ylab"),
  exoenzymes_plot_4 + rremove("xlab") + rremove("ylab") ,
  adherence_plot_5 + rremove("xlab") + rremove("ylab"),
  exoenzymes_plot_5 + rremove("xlab") + rremove("ylab") ,
  adherence_plot_6 + rremove("xlab") + rremove("ylab"),
  exoenzymes_plot_6 + rremove("xlab") + rremove("ylab") ,
  adherence_plot_7 + rremove("ylab"),
  exoenzymes_plot_7 + rremove("ylab") ,
  ncol = 2, nrow = 7, common.legend = TRUE, align = "v",
  labels = c("A1", "A2", "B1", "B2", "C1", "C2", "D1", "D2", "E1", "E2", "F1", "F2", "G1", "G2"),
  heights = c(1.25,4.5, 4.25, 1.75,3,3.5, 2)
)

annotate_figure(adherence_exoenzymes_plot,
                bottom = text_grob("Gene type", color = "black",
                                   hjust = 0.75, x = 0.5),
                left = text_grob("Isolate ID", color = "black", rot = 90, size = 12))

ggsave(plot = last_plot(),"./figures/adherence_exoenzymes.png",width = 40, height = 40, units = "cm")

3.2.3 Immune evasion

# ---- im_ev genes ----

im_ev_sau <- im_ev_long %>% filter(grepl("SAU",wgs_isolate_id)) 


im_ev_plot_1 <- im_ev_sau %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(im_ev_sau$wgs_isolate_id,im_ev_sau$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus aureus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())




im_ev_sch <- im_ev_long %>% filter(grepl("SCH",wgs_isolate_id)) 


im_ev_plot_2 <- im_ev_sch %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(im_ev_sch$wgs_isolate_id,im_ev_sch$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus chromogenes") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())


im_ev_shaem <- im_ev_long %>% filter(grepl("SHAEM",wgs_isolate_id)) 


im_ev_plot_3 <- im_ev_shaem %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(im_ev_shaem$wgs_isolate_id,im_ev_shaem$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus haemolyticus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())




im_ev_ssc <- im_ev_long %>% filter(grepl("SSC",wgs_isolate_id)) 


im_ev_plot_4 <- im_ev_ssc %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(im_ev_ssc$wgs_isolate_id,im_ev_ssc$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus sciuri") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())





im_ev_ssuc <- im_ev_long %>% filter(grepl("SSUC",wgs_isolate_id)) 




im_ev_plot_5 <- im_ev_ssuc %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(im_ev_ssuc$wgs_isolate_id,im_ev_ssuc$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus succinus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())



im_ev_ssxyl <- im_ev_long %>% filter(grepl("SXYL",wgs_isolate_id)) 

im_ev_ssxyl$wgs_isolate_id <- fct_reorder(im_ev_ssxyl$wgs_isolate_id,im_ev_ssxyl$wgs_id)

im_ev_spxyl <- im_ev_long %>% filter(grepl("SPXYL",wgs_isolate_id)) 

levels(im_ev_long$wgs_isolate_id)
## NULL
im_ev_spxyl$wgs_isolate_id <- fct_reorder(im_ev_spxyl$wgs_isolate_id,im_ev_spxyl$wgs_id)


im_ev_sxyl_spxyl <- rbind(im_ev_ssxyl,im_ev_spxyl)                                 
#im_ev_ssxyl$wgs_isolate_id <- fct_reorder(im_ev_ssxyl$wgs_isolate_id,im_ev_ssxyl$wgs_id)

table(im_ev_long$wgs_isolate_id)
## 
##    SAU1    SAU2    SAU3    SCH1   SCH10   SCH11   SCH12   SCH13   SCH14   SCH15 
##      34      34      34      34      34      34      34      34      34      34 
##   SCH16   SCH17   SCH18   SCH19    SCH2   SCH20   SCH21    SCH3    SCH4    SCH5 
##      34      34      34      34      34      34      34      34      34      34 
##    SCH6    SCH7    SCH8    SCH9   SCON1   SDEV1   SDEV2    SEQ1  SHAEM1 SHAEM10 
##      34      34      34      34      34      34      34      34      34      34 
## SHAEM11 SHAEM12 SHAEM13 SHAEM14 SHAEM15 SHAEM16 SHAEM17 SHAEM18 SHAEM19  SHAEM2 
##      34      34      34      34      34      34      34      34      34      34 
##  SHAEM3  SHAEM4  SHAEM5  SHAEM6  SHAEM7  SHAEM8  SHAEM9  SPXYL1  SPXYL2  SPXYL3 
##      34      34      34      34      34      34      34      34      34      34 
##  SPXYL4  SPXYL5    SSC1    SSC2    SSC3    SSC4    SSC5    SSC6   SSUC1  SSUC10 
##      34      34      34      34      34      34      34      34      34      34 
##  SSUC11  SSUC12   SSUC2   SSUC3   SSUC4   SSUC5   SSUC6   SSUC7   SSUC8   SSUC9 
##      34      34      34      34      34      34      34      34      34      34 
##    SUB1   SXYL1  SXYL10  SXYL11  SXYL12  SXYL13   SXYL2   SXYL3   SXYL4   SXYL5 
##      34      34      34      34      34      34      34      34      34      34 
##   SXYL6   SXYL7   SXYL8   SXYL9 
##      34      34      34      34
im_ev_plot_6 <- im_ev_sxyl_spxyl %>%
  ggplot(aes(x = vir_gene_nonredundant, y = wgs_isolate_id)) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus xylosus/pseudoxylosus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())


im_ev_scon <- im_ev_long %>% filter(grepl("SCON",wgs_isolate_id)) 

im_ev_scon$wgs_isolate_id <- fct_reorder(im_ev_scon$wgs_isolate_id,im_ev_scon$wgs_id)

im_ev_sdev <- im_ev_long %>% filter(grepl("SDEV",wgs_isolate_id)) 

levels(im_ev_long$wgs_isolate_id)
## NULL
im_ev_sdev$wgs_isolate_id <- fct_reorder(im_ev_sdev$wgs_isolate_id,im_ev_sdev$wgs_id)



im_ev_seq <- im_ev_long %>% filter(grepl("SEQ",wgs_isolate_id)) 

im_ev_seq$wgs_isolate_id <- fct_reorder(im_ev_seq$wgs_isolate_id,im_ev_seq$wgs_id)


im_ev_other <- rbind(im_ev_scon,im_ev_sdev,im_ev_seq)                                 

table(im_ev_long$wgs_isolate_id)
## 
##    SAU1    SAU2    SAU3    SCH1   SCH10   SCH11   SCH12   SCH13   SCH14   SCH15 
##      34      34      34      34      34      34      34      34      34      34 
##   SCH16   SCH17   SCH18   SCH19    SCH2   SCH20   SCH21    SCH3    SCH4    SCH5 
##      34      34      34      34      34      34      34      34      34      34 
##    SCH6    SCH7    SCH8    SCH9   SCON1   SDEV1   SDEV2    SEQ1  SHAEM1 SHAEM10 
##      34      34      34      34      34      34      34      34      34      34 
## SHAEM11 SHAEM12 SHAEM13 SHAEM14 SHAEM15 SHAEM16 SHAEM17 SHAEM18 SHAEM19  SHAEM2 
##      34      34      34      34      34      34      34      34      34      34 
##  SHAEM3  SHAEM4  SHAEM5  SHAEM6  SHAEM7  SHAEM8  SHAEM9  SPXYL1  SPXYL2  SPXYL3 
##      34      34      34      34      34      34      34      34      34      34 
##  SPXYL4  SPXYL5    SSC1    SSC2    SSC3    SSC4    SSC5    SSC6   SSUC1  SSUC10 
##      34      34      34      34      34      34      34      34      34      34 
##  SSUC11  SSUC12   SSUC2   SSUC3   SSUC4   SSUC5   SSUC6   SSUC7   SSUC8   SSUC9 
##      34      34      34      34      34      34      34      34      34      34 
##    SUB1   SXYL1  SXYL10  SXYL11  SXYL12  SXYL13   SXYL2   SXYL3   SXYL4   SXYL5 
##      34      34      34      34      34      34      34      34      34      34 
##   SXYL6   SXYL7   SXYL8   SXYL9 
##      34      34      34      34
im_ev_plot_7 <- im_ev_other %>%
  ggplot(aes(x = vir_gene_nonredundant, y = wgs_isolate_id)) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Immune evasion",fill="Presence gene",title="Other") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x = element_text(angle = 45, hjust = 1))




# Multipanel figure

im_ev_plot <- ggarrange(im_ev_plot_1 + rremove("xlab")+ rremove("ylab"),im_ev_plot_2 + rremove("xlab")+ rremove("ylab"),im_ev_plot_3 + rremove("xlab")+ rremove("ylab"),im_ev_plot_4 + rremove("xlab")+ rremove("ylab"),im_ev_plot_5 + rremove("xlab")+ rremove("ylab"),im_ev_plot_6 + rremove("xlab")+ rremove("ylab"),im_ev_plot_7 + rremove("xlab")+ rremove("ylab"),  ncol = 1, nrow = 7, common.legend = TRUE,align ="v",labels = c("A", "B", "C","D","E","F","G"),heights = c(1.25,4.5,4.25,1.75,3,3.5,2))

annotate_figure(im_ev_plot,
                bottom = text_grob("Gene", color = "black",
                                   hjust = 0.75, x = 0.5),
                left = text_grob("Isolate ID", color = "black", rot = 90, size = 12))

ggsave(plot = last_plot(),"./figures/im_ev.png",width = 20, height = 40, units = "cm")

3.2.5 Enterotoxins and exotoxins

# ---- enterotoxin genes ----

enterotoxin_sau <- enterotoxin_long %>% filter(grepl("SAU",wgs_isolate_id)) 


enterotoxin_plot_1 <- enterotoxin_sau %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(enterotoxin_sau$wgs_isolate_id,enterotoxin_sau$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus aureus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())

enterotoxin_sch <- enterotoxin_long %>% filter(grepl("SCH",wgs_isolate_id)) 


enterotoxin_plot_2 <- enterotoxin_sch %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(enterotoxin_sch$wgs_isolate_id,enterotoxin_sch$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus chromogenes") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())


enterotoxin_shaem <- enterotoxin_long %>% filter(grepl("SHAEM",wgs_isolate_id)) 


enterotoxin_plot_3 <- enterotoxin_shaem %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(enterotoxin_shaem$wgs_isolate_id,enterotoxin_shaem$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus haemolyticus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())

enterotoxin_ssc <- enterotoxin_long %>% filter(grepl("SSC",wgs_isolate_id)) 

enterotoxin_plot_4 <- enterotoxin_ssc %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(enterotoxin_ssc$wgs_isolate_id,enterotoxin_ssc$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus sciuri") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())

enterotoxin_ssuc <- enterotoxin_long %>% filter(grepl("SSUC",wgs_isolate_id)) 

enterotoxin_plot_5 <- enterotoxin_ssuc %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(enterotoxin_ssuc$wgs_isolate_id,enterotoxin_ssuc$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus succinus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())

enterotoxin_ssxyl <- enterotoxin_long %>% filter(grepl("SXYL",wgs_isolate_id)) 

enterotoxin_ssxyl$wgs_isolate_id <- fct_reorder(enterotoxin_ssxyl$wgs_isolate_id,enterotoxin_ssxyl$wgs_id)

enterotoxin_spxyl <- enterotoxin_long %>% filter(grepl("SPXYL",wgs_isolate_id)) 

levels(enterotoxin_long$wgs_isolate_id)
## NULL
enterotoxin_spxyl$wgs_isolate_id <- fct_reorder(enterotoxin_spxyl$wgs_isolate_id,enterotoxin_spxyl$wgs_id)


enterotoxin_sxyl_spxyl <- rbind(enterotoxin_ssxyl,enterotoxin_spxyl)                                 
#enterotoxin_ssxyl$wgs_isolate_id <- fct_reorder(enterotoxin_ssxyl$wgs_isolate_id,enterotoxin_ssxyl$wgs_id)

table(enterotoxin_long$wgs_isolate_id)
## 
##    SAU1    SAU2    SAU3    SCH1   SCH10   SCH11   SCH12   SCH13   SCH14   SCH15 
##      59      59      59      59      59      59      59      59      59      59 
##   SCH16   SCH17   SCH18   SCH19    SCH2   SCH20   SCH21    SCH3    SCH4    SCH5 
##      59      59      59      59      59      59      59      59      59      59 
##    SCH6    SCH7    SCH8    SCH9   SCON1   SDEV1   SDEV2    SEQ1  SHAEM1 SHAEM10 
##      59      59      59      59      59      59      59      59      59      59 
## SHAEM11 SHAEM12 SHAEM13 SHAEM14 SHAEM15 SHAEM16 SHAEM17 SHAEM18 SHAEM19  SHAEM2 
##      59      59      59      59      59      59      59      59      59      59 
##  SHAEM3  SHAEM4  SHAEM5  SHAEM6  SHAEM7  SHAEM8  SHAEM9  SPXYL1  SPXYL2  SPXYL3 
##      59      59      59      59      59      59      59      59      59      59 
##  SPXYL4  SPXYL5    SSC1    SSC2    SSC3    SSC4    SSC5    SSC6   SSUC1  SSUC10 
##      59      59      59      59      59      59      59      59      59      59 
##  SSUC11  SSUC12   SSUC2   SSUC3   SSUC4   SSUC5   SSUC6   SSUC7   SSUC8   SSUC9 
##      59      59      59      59      59      59      59      59      59      59 
##    SUB1   SXYL1  SXYL10  SXYL11  SXYL12  SXYL13   SXYL2   SXYL3   SXYL4   SXYL5 
##      59      59      59      59      59      59      59      59      59      59 
##   SXYL6   SXYL7   SXYL8   SXYL9 
##      59      59      59      59
enterotoxin_plot_6 <- enterotoxin_sxyl_spxyl %>%
  ggplot(aes(x = vir_gene_nonredundant, y = wgs_isolate_id)) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus xylosus/pseudoxylosus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())


enterotoxin_scon <- enterotoxin_long %>% filter(grepl("SCON",wgs_isolate_id)) 

enterotoxin_scon$wgs_isolate_id <- fct_reorder(enterotoxin_scon$wgs_isolate_id,enterotoxin_scon$wgs_id)

enterotoxin_sdev <- enterotoxin_long %>% filter(grepl("SDEV",wgs_isolate_id)) 

levels(enterotoxin_long$wgs_isolate_id)
## NULL
enterotoxin_sdev$wgs_isolate_id <- fct_reorder(enterotoxin_sdev$wgs_isolate_id,enterotoxin_sdev$wgs_id)



enterotoxin_seq <- enterotoxin_long %>% filter(grepl("SEQ",wgs_isolate_id)) 

enterotoxin_seq$wgs_isolate_id <- fct_reorder(enterotoxin_seq$wgs_isolate_id,enterotoxin_seq$wgs_id)


enterotoxin_other <- rbind(enterotoxin_scon,enterotoxin_sdev,enterotoxin_seq)                                 

table(enterotoxin_long$wgs_isolate_id)
## 
##    SAU1    SAU2    SAU3    SCH1   SCH10   SCH11   SCH12   SCH13   SCH14   SCH15 
##      59      59      59      59      59      59      59      59      59      59 
##   SCH16   SCH17   SCH18   SCH19    SCH2   SCH20   SCH21    SCH3    SCH4    SCH5 
##      59      59      59      59      59      59      59      59      59      59 
##    SCH6    SCH7    SCH8    SCH9   SCON1   SDEV1   SDEV2    SEQ1  SHAEM1 SHAEM10 
##      59      59      59      59      59      59      59      59      59      59 
## SHAEM11 SHAEM12 SHAEM13 SHAEM14 SHAEM15 SHAEM16 SHAEM17 SHAEM18 SHAEM19  SHAEM2 
##      59      59      59      59      59      59      59      59      59      59 
##  SHAEM3  SHAEM4  SHAEM5  SHAEM6  SHAEM7  SHAEM8  SHAEM9  SPXYL1  SPXYL2  SPXYL3 
##      59      59      59      59      59      59      59      59      59      59 
##  SPXYL4  SPXYL5    SSC1    SSC2    SSC3    SSC4    SSC5    SSC6   SSUC1  SSUC10 
##      59      59      59      59      59      59      59      59      59      59 
##  SSUC11  SSUC12   SSUC2   SSUC3   SSUC4   SSUC5   SSUC6   SSUC7   SSUC8   SSUC9 
##      59      59      59      59      59      59      59      59      59      59 
##    SUB1   SXYL1  SXYL10  SXYL11  SXYL12  SXYL13   SXYL2   SXYL3   SXYL4   SXYL5 
##      59      59      59      59      59      59      59      59      59      59 
##   SXYL6   SXYL7   SXYL8   SXYL9 
##      59      59      59      59
enterotoxin_plot_7 <- enterotoxin_other %>%
  ggplot(aes(x = vir_gene_nonredundant, y = wgs_isolate_id)) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Other") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.title.x = element_blank(),axis.text.x = element_text(angle = 45, hjust = 1))




# Multipanel figure

enterotoxin_plot <- ggarrange(enterotoxin_plot_1 + rremove("xlab")+ rremove("ylab"),enterotoxin_plot_2 + rremove("xlab")+ rremove("ylab"),enterotoxin_plot_3 + rremove("xlab")+ rremove("ylab"),enterotoxin_plot_4 + rremove("xlab")+ rremove("ylab"),enterotoxin_plot_5 + rremove("xlab")+ rremove("ylab"),enterotoxin_plot_6 + rremove("xlab")+ rremove("ylab"),enterotoxin_plot_7 + rremove("xlab")+ rremove("ylab"),  ncol = 1, nrow = 7, common.legend = TRUE,align ="v",labels = c("A", "B", "C","D","E","F","G"),heights = c(1.25,4.5,4.25,1.75,3,3.5,2))

annotate_figure(enterotoxin_plot,
                bottom = text_grob("Gene", color = "black",
                                   hjust = 0.75, x = 0.5),
                left = text_grob("Isolate ID", color = "black", rot = 90, size = 12))

ggsave(plot = last_plot(),"./figures/enterotoxin.png",width = 50, height = 40, units = "cm")

3.2.6 Other toxins

# ---- toxin genes ----

toxin_sau <- toxin_long %>% filter(grepl("SAU",wgs_isolate_id)) 


toxin_plot_1 <- toxin_sau %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(toxin_sau$wgs_isolate_id,toxin_sau$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus aureus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())




toxin_sch <- toxin_long %>% filter(grepl("SCH",wgs_isolate_id)) 


toxin_plot_2 <- toxin_sch %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(toxin_sch$wgs_isolate_id,toxin_sch$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus chromogenes") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())


toxin_shaem <- toxin_long %>% filter(grepl("SHAEM",wgs_isolate_id)) 


toxin_plot_3 <- toxin_shaem %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(toxin_shaem$wgs_isolate_id,toxin_shaem$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus haemolyticus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())




toxin_ssc <- toxin_long %>% filter(grepl("SSC",wgs_isolate_id)) 


toxin_plot_4 <- toxin_ssc %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(toxin_ssc$wgs_isolate_id,toxin_ssc$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus sciuri") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())





toxin_ssuc <- toxin_long %>% filter(grepl("SSUC",wgs_isolate_id)) 




toxin_plot_5 <- toxin_ssuc %>%
  ggplot(aes(x = vir_gene_nonredundant, y = fct_reorder(toxin_ssuc$wgs_isolate_id,toxin_ssuc$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus succinus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())



toxin_ssxyl <- toxin_long %>% filter(grepl("SXYL",wgs_isolate_id)) 

toxin_ssxyl$wgs_isolate_id <- fct_reorder(toxin_ssxyl$wgs_isolate_id,toxin_ssxyl$wgs_id)

toxin_spxyl <- toxin_long %>% filter(grepl("SPXYL",wgs_isolate_id)) 

levels(toxin_long$wgs_isolate_id)
## NULL
toxin_spxyl$wgs_isolate_id <- fct_reorder(toxin_spxyl$wgs_isolate_id,toxin_spxyl$wgs_id)


toxin_sxyl_spxyl <- rbind(toxin_ssxyl,toxin_spxyl)                                 
#toxin_ssxyl$wgs_isolate_id <- fct_reorder(toxin_ssxyl$wgs_isolate_id,toxin_ssxyl$wgs_id)

table(toxin_long$wgs_isolate_id)
## 
##    SAU1    SAU2    SAU3    SCH1   SCH10   SCH11   SCH12   SCH13   SCH14   SCH15 
##      30      30      30      30      30      30      30      30      30      30 
##   SCH16   SCH17   SCH18   SCH19    SCH2   SCH20   SCH21    SCH3    SCH4    SCH5 
##      30      30      30      30      30      30      30      30      30      30 
##    SCH6    SCH7    SCH8    SCH9   SCON1   SDEV1   SDEV2    SEQ1  SHAEM1 SHAEM10 
##      30      30      30      30      30      30      30      30      30      30 
## SHAEM11 SHAEM12 SHAEM13 SHAEM14 SHAEM15 SHAEM16 SHAEM17 SHAEM18 SHAEM19  SHAEM2 
##      30      30      30      30      30      30      30      30      30      30 
##  SHAEM3  SHAEM4  SHAEM5  SHAEM6  SHAEM7  SHAEM8  SHAEM9  SPXYL1  SPXYL2  SPXYL3 
##      30      30      30      30      30      30      30      30      30      30 
##  SPXYL4  SPXYL5    SSC1    SSC2    SSC3    SSC4    SSC5    SSC6   SSUC1  SSUC10 
##      30      30      30      30      30      30      30      30      30      30 
##  SSUC11  SSUC12   SSUC2   SSUC3   SSUC4   SSUC5   SSUC6   SSUC7   SSUC8   SSUC9 
##      30      30      30      30      30      30      30      30      30      30 
##    SUB1   SXYL1  SXYL10  SXYL11  SXYL12  SXYL13   SXYL2   SXYL3   SXYL4   SXYL5 
##      30      30      30      30      30      30      30      30      30      30 
##   SXYL6   SXYL7   SXYL8   SXYL9 
##      30      30      30      30
toxin_plot_6 <- toxin_sxyl_spxyl %>%
  ggplot(aes(x = vir_gene_nonredundant, y = wgs_isolate_id)) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus xylosus/pseudoxylosus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())


toxin_scon <- toxin_long %>% filter(grepl("SCON",wgs_isolate_id)) 

toxin_scon$wgs_isolate_id <- fct_reorder(toxin_scon$wgs_isolate_id,toxin_scon$wgs_id)

toxin_sdev <- toxin_long %>% filter(grepl("SDEV",wgs_isolate_id)) 

levels(toxin_long$wgs_isolate_id)
## NULL
toxin_sdev$wgs_isolate_id <- fct_reorder(toxin_sdev$wgs_isolate_id,toxin_sdev$wgs_id)



toxin_seq <- toxin_long %>% filter(grepl("SEQ",wgs_isolate_id)) 

toxin_seq$wgs_isolate_id <- fct_reorder(toxin_seq$wgs_isolate_id,toxin_seq$wgs_id)


toxin_other <- rbind(toxin_scon,toxin_sdev,toxin_seq)                                 

table(toxin_long$wgs_isolate_id)
## 
##    SAU1    SAU2    SAU3    SCH1   SCH10   SCH11   SCH12   SCH13   SCH14   SCH15 
##      30      30      30      30      30      30      30      30      30      30 
##   SCH16   SCH17   SCH18   SCH19    SCH2   SCH20   SCH21    SCH3    SCH4    SCH5 
##      30      30      30      30      30      30      30      30      30      30 
##    SCH6    SCH7    SCH8    SCH9   SCON1   SDEV1   SDEV2    SEQ1  SHAEM1 SHAEM10 
##      30      30      30      30      30      30      30      30      30      30 
## SHAEM11 SHAEM12 SHAEM13 SHAEM14 SHAEM15 SHAEM16 SHAEM17 SHAEM18 SHAEM19  SHAEM2 
##      30      30      30      30      30      30      30      30      30      30 
##  SHAEM3  SHAEM4  SHAEM5  SHAEM6  SHAEM7  SHAEM8  SHAEM9  SPXYL1  SPXYL2  SPXYL3 
##      30      30      30      30      30      30      30      30      30      30 
##  SPXYL4  SPXYL5    SSC1    SSC2    SSC3    SSC4    SSC5    SSC6   SSUC1  SSUC10 
##      30      30      30      30      30      30      30      30      30      30 
##  SSUC11  SSUC12   SSUC2   SSUC3   SSUC4   SSUC5   SSUC6   SSUC7   SSUC8   SSUC9 
##      30      30      30      30      30      30      30      30      30      30 
##    SUB1   SXYL1  SXYL10  SXYL11  SXYL12  SXYL13   SXYL2   SXYL3   SXYL4   SXYL5 
##      30      30      30      30      30      30      30      30      30      30 
##   SXYL6   SXYL7   SXYL8   SXYL9 
##      30      30      30      30
toxin_plot_7 <- toxin_other %>%
  ggplot(aes(x = vir_gene_nonredundant, y = wgs_isolate_id)) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Other") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.title.x = element_blank(),axis.text.x = element_text(angle = 45, hjust = 1))




# Multipanel figure

toxin_plot <- ggarrange(toxin_plot_1 + rremove("xlab")+ rremove("ylab"),toxin_plot_2 + rremove("xlab")+ rremove("ylab"),toxin_plot_3 + rremove("xlab")+ rremove("ylab"),toxin_plot_4 + rremove("xlab")+ rremove("ylab"),toxin_plot_5 + rremove("xlab")+ rremove("ylab"),toxin_plot_6 + rremove("xlab")+ rremove("ylab"),toxin_plot_7 + rremove("xlab")+ rremove("ylab"),  ncol = 1, nrow = 7, common.legend = TRUE,align ="v",labels = c("A", "B", "C","D","E","F","G"),heights = c(1.25,4.5,4.25,1.75,3,3.5,2.25))

annotate_figure(toxin_plot,
                bottom = text_grob("Gene", color = "black",
                                   hjust = 0.75, x = 0.5),
                left = text_grob("Isolate ID", color = "black", rot = 90, size = 12))

ggsave(plot = last_plot(),"./figures/toxin.png",width = 20, height = 40, units = "cm")

4 AMR genes

4.1 Summary

# Presence of AMR related genes

amr_long$sau <- ifelse(amr_long$wgs_species_3 == "SAU", 1, 0)

amr_long <- amr_long %>% filter(wgs_species_3!="SUB")

amr_long_summary <- amr_long %>% group_by(gene_abbreviation,wgs_species_3) %>% summarise(n=max(presence)) %>% filter(n==1) %>% filter(wgs_species_3!="SUB")
## `summarise()` has grouped output by 'gene_abbreviation'. You can override using
## the `.groups` argument.
amr_long_summary %>% ungroup() %>% group_by(wgs_species_3) %>% summarise(n_genes=sum(n))
## # A tibble: 10 × 2
##    wgs_species_3 n_genes
##    <chr>           <dbl>
##  1 SAU                15
##  2 SCH                 1
##  3 SCON                6
##  4 SDEV                3
##  5 SEQ                 2
##  6 SHAEM               4
##  7 SPXYL               6
##  8 SSC                 4
##  9 SSUC                1
## 10 SXYL                4
table1(~presence_gene|gene_abbreviation,data=amr_long)
## Warning in .table1.internal(x = x, labels = labels, groupspan = groupspan, :
## Table has 29 columns. Are you sure this is what you want?
AAC3
(N=83)
ANT9
(N=83)
APH3-PRIME
(N=83)
ARLR
(N=83)
ARLS
(N=83)
BLAZ
(N=83)
DHAP
(N=83)
ERM
(N=83)
FOSB
(N=83)
FOSD
(N=83)
FUSF
(N=83)
LMRS
(N=83)
LNUA
(N=83)
MECA
(N=83)
MECC
(N=83)
MECI
(N=83)
MEPA
(N=83)
MEPB
(N=83)
MEPR
(N=83)
MGRA
(N=83)
MPHC
(N=83)
MSRA
(N=83)
NORA
(N=83)
NORB
(N=83)
RLMH
(N=83)
SALA
(N=83)
TET38
(N=83)
TETK
(N=83)
Overall
(N=2324)
presence_gene
No 80 (96.4%) 82 (98.8%) 41 (49.4%) 80 (96.4%) 80 (96.4%) 78 (94.0%) 80 (96.4%) 78 (94.0%) 81 (97.6%) 81 (97.6%) 82 (98.8%) 80 (96.4%) 77 (92.8%) 77 (92.8%) 78 (94.0%) 78 (94.0%) 80 (96.4%) 80 (96.4%) 80 (96.4%) 59 (71.1%) 69 (83.1%) 82 (98.8%) 80 (96.4%) 80 (96.4%) 39 (47.0%) 77 (92.8%) 80 (96.4%) 82 (98.8%) 2121 (91.3%)
Yes 3 (3.6%) 1 (1.2%) 42 (50.6%) 3 (3.6%) 3 (3.6%) 5 (6.0%) 3 (3.6%) 5 (6.0%) 2 (2.4%) 2 (2.4%) 1 (1.2%) 3 (3.6%) 6 (7.2%) 6 (7.2%) 5 (6.0%) 5 (6.0%) 3 (3.6%) 3 (3.6%) 3 (3.6%) 24 (28.9%) 14 (16.9%) 1 (1.2%) 3 (3.6%) 3 (3.6%) 44 (53.0%) 6 (7.2%) 3 (3.6%) 1 (1.2%) 203 (8.7%)
table1(~presence_gene|wgs_species_3:gene_abbreviation,data=amr_long)
## Warning in .table1.internal(x = x, labels = labels, groupspan = groupspan, :
## Table has 308 columns. Are you sure this is what you want?
SAU
SCH
SCON
SDEV
SEQ
SHAEM
SPXYL
SSC
SSUC
SXYL
Overall
AAC3
(N=3)
ANT9
(N=3)
APH3-PRIME
(N=3)
ARLR
(N=3)
ARLS
(N=3)
BLAZ
(N=3)
DHAP
(N=3)
ERM
(N=3)
FOSB
(N=3)
FOSD
(N=3)
FUSF
(N=3)
LMRS
(N=3)
LNUA
(N=3)
MECA
(N=3)
MECC
(N=3)
MECI
(N=3)
MEPA
(N=3)
MEPB
(N=3)
MEPR
(N=3)
MGRA
(N=3)
MPHC
(N=3)
MSRA
(N=3)
NORA
(N=3)
NORB
(N=3)
RLMH
(N=3)
SALA
(N=3)
TET38
(N=3)
TETK
(N=3)
AAC3
(N=21)
ANT9
(N=21)
APH3-PRIME
(N=21)
ARLR
(N=21)
ARLS
(N=21)
BLAZ
(N=21)
DHAP
(N=21)
ERM
(N=21)
FOSB
(N=21)
FOSD
(N=21)
FUSF
(N=21)
LMRS
(N=21)
LNUA
(N=21)
MECA
(N=21)
MECC
(N=21)
MECI
(N=21)
MEPA
(N=21)
MEPB
(N=21)
MEPR
(N=21)
MGRA
(N=21)
MPHC
(N=21)
MSRA
(N=21)
NORA
(N=21)
NORB
(N=21)
RLMH
(N=21)
SALA
(N=21)
TET38
(N=21)
TETK
(N=21)
AAC3
(N=1)
ANT9
(N=1)
APH3-PRIME
(N=1)
ARLR
(N=1)
ARLS
(N=1)
BLAZ
(N=1)
DHAP
(N=1)
ERM
(N=1)
FOSB
(N=1)
FOSD
(N=1)
FUSF
(N=1)
LMRS
(N=1)
LNUA
(N=1)
MECA
(N=1)
MECC
(N=1)
MECI
(N=1)
MEPA
(N=1)
MEPB
(N=1)
MEPR
(N=1)
MGRA
(N=1)
MPHC
(N=1)
MSRA
(N=1)
NORA
(N=1)
NORB
(N=1)
RLMH
(N=1)
SALA
(N=1)
TET38
(N=1)
TETK
(N=1)
AAC3
(N=2)
ANT9
(N=2)
APH3-PRIME
(N=2)
ARLR
(N=2)
ARLS
(N=2)
BLAZ
(N=2)
DHAP
(N=2)
ERM
(N=2)
FOSB
(N=2)
FOSD
(N=2)
FUSF
(N=2)
LMRS
(N=2)
LNUA
(N=2)
MECA
(N=2)
MECC
(N=2)
MECI
(N=2)
MEPA
(N=2)
MEPB
(N=2)
MEPR
(N=2)
MGRA
(N=2)
MPHC
(N=2)
MSRA
(N=2)
NORA
(N=2)
NORB
(N=2)
RLMH
(N=2)
SALA
(N=2)
TET38
(N=2)
TETK
(N=2)
AAC3
(N=1)
ANT9
(N=1)
APH3-PRIME
(N=1)
ARLR
(N=1)
ARLS
(N=1)
BLAZ
(N=1)
DHAP
(N=1)
ERM
(N=1)
FOSB
(N=1)
FOSD
(N=1)
FUSF
(N=1)
LMRS
(N=1)
LNUA
(N=1)
MECA
(N=1)
MECC
(N=1)
MECI
(N=1)
MEPA
(N=1)
MEPB
(N=1)
MEPR
(N=1)
MGRA
(N=1)
MPHC
(N=1)
MSRA
(N=1)
NORA
(N=1)
NORB
(N=1)
RLMH
(N=1)
SALA
(N=1)
TET38
(N=1)
TETK
(N=1)
AAC3
(N=19)
ANT9
(N=19)
APH3-PRIME
(N=19)
ARLR
(N=19)
ARLS
(N=19)
BLAZ
(N=19)
DHAP
(N=19)
ERM
(N=19)
FOSB
(N=19)
FOSD
(N=19)
FUSF
(N=19)
LMRS
(N=19)
LNUA
(N=19)
MECA
(N=19)
MECC
(N=19)
MECI
(N=19)
MEPA
(N=19)
MEPB
(N=19)
MEPR
(N=19)
MGRA
(N=19)
MPHC
(N=19)
MSRA
(N=19)
NORA
(N=19)
NORB
(N=19)
RLMH
(N=19)
SALA
(N=19)
TET38
(N=19)
TETK
(N=19)
AAC3
(N=5)
ANT9
(N=5)
APH3-PRIME
(N=5)
ARLR
(N=5)
ARLS
(N=5)
BLAZ
(N=5)
DHAP
(N=5)
ERM
(N=5)
FOSB
(N=5)
FOSD
(N=5)
FUSF
(N=5)
LMRS
(N=5)
LNUA
(N=5)
MECA
(N=5)
MECC
(N=5)
MECI
(N=5)
MEPA
(N=5)
MEPB
(N=5)
MEPR
(N=5)
MGRA
(N=5)
MPHC
(N=5)
MSRA
(N=5)
NORA
(N=5)
NORB
(N=5)
RLMH
(N=5)
SALA
(N=5)
TET38
(N=5)
TETK
(N=5)
AAC3
(N=6)
ANT9
(N=6)
APH3-PRIME
(N=6)
ARLR
(N=6)
ARLS
(N=6)
BLAZ
(N=6)
DHAP
(N=6)
ERM
(N=6)
FOSB
(N=6)
FOSD
(N=6)
FUSF
(N=6)
LMRS
(N=6)
LNUA
(N=6)
MECA
(N=6)
MECC
(N=6)
MECI
(N=6)
MEPA
(N=6)
MEPB
(N=6)
MEPR
(N=6)
MGRA
(N=6)
MPHC
(N=6)
MSRA
(N=6)
NORA
(N=6)
NORB
(N=6)
RLMH
(N=6)
SALA
(N=6)
TET38
(N=6)
TETK
(N=6)
AAC3
(N=12)
ANT9
(N=12)
APH3-PRIME
(N=12)
ARLR
(N=12)
ARLS
(N=12)
BLAZ
(N=12)
DHAP
(N=12)
ERM
(N=12)
FOSB
(N=12)
FOSD
(N=12)
FUSF
(N=12)
LMRS
(N=12)
LNUA
(N=12)
MECA
(N=12)
MECC
(N=12)
MECI
(N=12)
MEPA
(N=12)
MEPB
(N=12)
MEPR
(N=12)
MGRA
(N=12)
MPHC
(N=12)
MSRA
(N=12)
NORA
(N=12)
NORB
(N=12)
RLMH
(N=12)
SALA
(N=12)
TET38
(N=12)
TETK
(N=12)
AAC3
(N=13)
ANT9
(N=13)
APH3-PRIME
(N=13)
ARLR
(N=13)
ARLS
(N=13)
BLAZ
(N=13)
DHAP
(N=13)
ERM
(N=13)
FOSB
(N=13)
FOSD
(N=13)
FUSF
(N=13)
LMRS
(N=13)
LNUA
(N=13)
MECA
(N=13)
MECC
(N=13)
MECI
(N=13)
MEPA
(N=13)
MEPB
(N=13)
MEPR
(N=13)
MGRA
(N=13)
MPHC
(N=13)
MSRA
(N=13)
NORA
(N=13)
NORB
(N=13)
RLMH
(N=13)
SALA
(N=13)
TET38
(N=13)
TETK
(N=13)
AAC3
(N=83)
ANT9
(N=83)
APH3-PRIME
(N=83)
ARLR
(N=83)
ARLS
(N=83)
BLAZ
(N=83)
DHAP
(N=83)
ERM
(N=83)
FOSB
(N=83)
FOSD
(N=83)
FUSF
(N=83)
LMRS
(N=83)
LNUA
(N=83)
MECA
(N=83)
MECC
(N=83)
MECI
(N=83)
MEPA
(N=83)
MEPB
(N=83)
MEPR
(N=83)
MGRA
(N=83)
MPHC
(N=83)
MSRA
(N=83)
NORA
(N=83)
NORB
(N=83)
RLMH
(N=83)
SALA
(N=83)
TET38
(N=83)
TETK
(N=83)
presence_gene
Yes 3 (100%) 0 (0%) 3 (100%) 3 (100%) 3 (100%) 0 (0%) 3 (100%) 0 (0%) 1 (33.3%) 0 (0%) 0 (0%) 3 (100%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 3 (100%) 3 (100%) 3 (100%) 3 (100%) 0 (0%) 0 (0%) 3 (100%) 3 (100%) 3 (100%) 0 (0%) 3 (100%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 20 (95.2%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (100%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (100%) 0 (0%) 1 (100%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (100%) 1 (100%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (100%) 0 (0%) 0 (0%) 2 (100%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 2 (100%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 2 (100%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (100%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (100%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 19 (100%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 3 (15.8%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 19 (100%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 19 (100%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 5 (100%) 0 (0%) 4 (80.0%) 1 (20.0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 5 (100%) 5 (100%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 5 (100%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (16.7%) 0 (0%) 0 (0%) 2 (33.3%) 6 (100%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 6 (100%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 12 (100%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 5 (38.5%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (7.7%) 0 (0%) 1 (7.7%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 7 (53.8%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 3 (3.6%) 1 (1.2%) 42 (50.6%) 3 (3.6%) 3 (3.6%) 5 (6.0%) 3 (3.6%) 5 (6.0%) 2 (2.4%) 2 (2.4%) 1 (1.2%) 3 (3.6%) 6 (7.2%) 6 (7.2%) 5 (6.0%) 5 (6.0%) 3 (3.6%) 3 (3.6%) 3 (3.6%) 24 (28.9%) 14 (16.9%) 1 (1.2%) 3 (3.6%) 3 (3.6%) 44 (53.0%) 6 (7.2%) 3 (3.6%) 1 (1.2%)
No 0 (0%) 3 (100%) 0 (0%) 0 (0%) 0 (0%) 3 (100%) 0 (0%) 3 (100%) 2 (66.7%) 3 (100%) 3 (100%) 0 (0%) 3 (100%) 3 (100%) 3 (100%) 3 (100%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 3 (100%) 3 (100%) 0 (0%) 0 (0%) 0 (0%) 3 (100%) 0 (0%) 3 (100%) 21 (100%) 21 (100%) 21 (100%) 21 (100%) 21 (100%) 21 (100%) 21 (100%) 21 (100%) 21 (100%) 21 (100%) 21 (100%) 21 (100%) 21 (100%) 21 (100%) 21 (100%) 21 (100%) 21 (100%) 21 (100%) 21 (100%) 21 (100%) 21 (100%) 21 (100%) 21 (100%) 21 (100%) 1 (4.8%) 21 (100%) 21 (100%) 21 (100%) 1 (100%) 0 (0%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 0 (0%) 1 (100%) 0 (0%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 0 (0%) 0 (0%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 0 (0%) 2 (100%) 2 (100%) 0 (0%) 2 (100%) 2 (100%) 2 (100%) 2 (100%) 2 (100%) 2 (100%) 2 (100%) 2 (100%) 2 (100%) 2 (100%) 2 (100%) 2 (100%) 2 (100%) 2 (100%) 2 (100%) 2 (100%) 0 (0%) 2 (100%) 2 (100%) 2 (100%) 2 (100%) 0 (0%) 2 (100%) 2 (100%) 2 (100%) 1 (100%) 1 (100%) 0 (0%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 0 (0%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 1 (100%) 19 (100%) 19 (100%) 0 (0%) 19 (100%) 19 (100%) 19 (100%) 19 (100%) 19 (100%) 19 (100%) 19 (100%) 19 (100%) 19 (100%) 16 (84.2%) 19 (100%) 19 (100%) 19 (100%) 19 (100%) 19 (100%) 19 (100%) 0 (0%) 19 (100%) 19 (100%) 19 (100%) 19 (100%) 0 (0%) 19 (100%) 19 (100%) 19 (100%) 5 (100%) 5 (100%) 5 (100%) 5 (100%) 5 (100%) 0 (0%) 5 (100%) 1 (20.0%) 4 (80.0%) 5 (100%) 5 (100%) 5 (100%) 5 (100%) 5 (100%) 0 (0%) 0 (0%) 5 (100%) 5 (100%) 5 (100%) 5 (100%) 0 (0%) 5 (100%) 5 (100%) 5 (100%) 5 (100%) 5 (100%) 5 (100%) 5 (100%) 6 (100%) 6 (100%) 6 (100%) 6 (100%) 6 (100%) 6 (100%) 6 (100%) 6 (100%) 6 (100%) 5 (83.3%) 6 (100%) 6 (100%) 4 (66.7%) 0 (0%) 6 (100%) 6 (100%) 6 (100%) 6 (100%) 6 (100%) 6 (100%) 6 (100%) 6 (100%) 6 (100%) 6 (100%) 6 (100%) 0 (0%) 6 (100%) 6 (100%) 12 (100%) 12 (100%) 0 (0%) 12 (100%) 12 (100%) 12 (100%) 12 (100%) 12 (100%) 12 (100%) 12 (100%) 12 (100%) 12 (100%) 12 (100%) 12 (100%) 12 (100%) 12 (100%) 12 (100%) 12 (100%) 12 (100%) 12 (100%) 12 (100%) 12 (100%) 12 (100%) 12 (100%) 12 (100%) 12 (100%) 12 (100%) 12 (100%) 13 (100%) 13 (100%) 8 (61.5%) 13 (100%) 13 (100%) 13 (100%) 13 (100%) 12 (92.3%) 13 (100%) 12 (92.3%) 13 (100%) 13 (100%) 13 (100%) 13 (100%) 13 (100%) 13 (100%) 13 (100%) 13 (100%) 13 (100%) 13 (100%) 6 (46.2%) 13 (100%) 13 (100%) 13 (100%) 13 (100%) 13 (100%) 13 (100%) 13 (100%) 80 (96.4%) 82 (98.8%) 41 (49.4%) 80 (96.4%) 80 (96.4%) 78 (94.0%) 80 (96.4%) 78 (94.0%) 81 (97.6%) 81 (97.6%) 82 (98.8%) 80 (96.4%) 77 (92.8%) 77 (92.8%) 78 (94.0%) 78 (94.0%) 80 (96.4%) 80 (96.4%) 80 (96.4%) 59 (71.1%) 69 (83.1%) 82 (98.8%) 80 (96.4%) 80 (96.4%) 39 (47.0%) 77 (92.8%) 80 (96.4%) 82 (98.8%)

4.2 Visualizations

# ---- amr genes ----

# Staphylococcus aureus

amr_plot_1 <- amr_sau %>%
  ggplot(aes(x = gene_abbreviation, y = fct_reorder(amr_sau$wgs_isolate_id,amr_sau$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus aureus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())

# Staphylococcus chromogenes

amr_plot_2 <- amr_sch %>%
  ggplot(aes(x = gene_abbreviation, y = fct_reorder(amr_sch$wgs_isolate_id,amr_sch$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus chromogenes") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())

# Staphylococcus haemolyticus

amr_plot_3 <- amr_shaem %>%
  ggplot(aes(x = gene_abbreviation, y = fct_reorder(amr_shaem$wgs_isolate_id,amr_shaem$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus haemolyticus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())

# Staphylococcus sciuri

amr_plot_4 <- amr_ssc %>%
  ggplot(aes(x = gene_abbreviation, y = fct_reorder(amr_ssc$wgs_isolate_id,amr_ssc$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus sciuri") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())

#Staphylococcus succinus

amr_plot_5 <- amr_ssuc %>%
  ggplot(aes(x = gene_abbreviation, y = fct_reorder(amr_ssuc$wgs_isolate_id,amr_ssuc$wgs_id))) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus succinus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())

# Staphylococcus xylosus/saprophytiucs

amr_plot_6 <- amr_sxyl_spxyl %>%
  ggplot(aes(x = gene_abbreviation, y = wgs_isolate_id)) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "Gene ID",fill="Presence gene",title="Staphylococcus xylosus/pseudoxylosus") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x=element_blank(),axis.title.x = element_blank())

# Others

amr_plot_7 <- amr_other %>%
  ggplot(aes(x = gene_abbreviation, y = wgs_isolate_id)) +
  geom_tile(aes(fill = presence_gene), color = "black") +
  scale_fill_manual(labels = c("No", "Yes"), values = c("white", "#009999")) +
  labs(y = "Isolate ID", x = "ARGs",fill="Presence gene",title="Other") +
  scale_y_discrete(limits = rev) +
  theme(plot.title=element_text(face="italic"),axis.text.x = element_text(angle = 45, hjust = 1))

# Multipanel figure with different species

amr_plot <- ggarrange(amr_plot_1 + rremove("xlab")+ rremove("ylab"),amr_plot_2 + rremove("xlab")+ rremove("ylab"),amr_plot_3 + rremove("xlab")+ rremove("ylab"),amr_plot_4 + rremove("xlab")+ rremove("ylab"),amr_plot_5 + rremove("xlab")+ rremove("ylab"),amr_plot_6 + rremove("xlab")+ rremove("ylab"),amr_plot_7 + rremove("xlab")+ rremove("ylab"),  ncol = 1, nrow = 7, common.legend = TRUE,align ="v",labels = c("A", "B", "C","D","E","F","G"),heights = c(1.25,4.5,4.25,1.75,3,3.5,2.25))

annotate_figure(amr_plot,
                bottom = text_grob("Gene", color = "black",
                                   hjust = 0.75, x = 0.5),
                left = text_grob("Isolate ID", color = "black", rot = 90, size = 12))

ggsave(plot = last_plot(),"./figures/amr.png",width = 20, height = 40, units = "cm")